diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 500b62d..9892aee 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -28,6 +28,7 @@ A platform to run and host applications, with a focus on Python applications. ## Code Generation & Style - **Python**: Use modern Python 3.14+ features. MUST include strict type hints. Follow PEP 8 (120-char line length). Use double quotes for strings. +- **Python Types**: NEVER use `object` as a type hint. Use precise concrete types, Protocols, or named type aliases instead. - **Ruff**: Respect strict Ruff config in `pyproject.toml` (`force-single-line = true` for imports). Do not rely on auto-removal for unused variables (`F841` is unfixable); fix manually. - **Django**: - Prefer MTV with fat models and thin views. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 94d8423..204ba09 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,4 +37,26 @@ uv run python manage.py check uv run pytest -n 5 -q uv run ruff check . --fix uv run ruff format . +# Start Celery workers +uv run celery -A config worker -l info ``` + +### 4 Local test deployment flow + +Set `DJANGO_SECRET_KEY` before running Django management commands. + +```bash +export DJANGO_SECRET_KEY="dev-only-secret" +uv run python manage.py create_test_deployment +``` + +The command creates a randomized tenant and hosted site, provisions PostgreSQL and Redis test containers, +builds a reusable local Django test image, and prints a localhost sentinel URL when the deployment reaches +`running`. + +Open `/` or `/deployments/` in the Django web UI to inspect recent deployments, runtime service states, +live sentinel health, and the latest captured Podman log snapshots for Django, PostgreSQL, and Redis. + +Use `--no-wait` only when you have a real cross-process Celery broker configured through +`TUSSILAGO_CELERY_BROKER_URL` and a worker process running. `memory://` is not valid for this mode because a +separate worker cannot consume in-memory tasks from another process. diff --git a/config/__init__.py b/config/__init__.py index e69de29..3eb91a6 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -0,0 +1,3 @@ +from config.celery import app as celery_app + +__all__ = ("celery_app",) diff --git a/config/celery.py b/config/celery.py new file mode 100644 index 0000000..b026f5c --- /dev/null +++ b/config/celery.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +import os + +from celery import Celery + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings") + +app = Celery("tussilago") +app.config_from_object("django.conf:settings", namespace="CELERY") +app.autodiscover_tasks() diff --git a/config/checks.py b/config/checks.py index e8f83cc..082b4a8 100644 --- a/config/checks.py +++ b/config/checks.py @@ -39,9 +39,7 @@ def check_required_dev_commands(*_: object, **__: object) -> list[CheckMessage]: if not (settings.DEBUG or getattr(settings, "TESTING", False)): return [] - missing_commands: list[str] = [ - command for command in REQUIRED_DEV_COMMANDS if shutil.which(command) is None - ] + missing_commands: list[str] = [command for command in REQUIRED_DEV_COMMANDS if shutil.which(command) is None] if not missing_commands: return [] diff --git a/config/dev_autoreload.py b/config/dev_autoreload.py new file mode 100644 index 0000000..2490e68 --- /dev/null +++ b/config/dev_autoreload.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import django_watchfiles +from django.utils import autoreload + +if TYPE_CHECKING: + from collections.abc import Iterable + from collections.abc import Sequence + + from watchfiles import Change + +IGNORED_PROJECT_ROOT_NAMES: frozenset[str] = frozenset({ + ".git", + ".venv", + "__pycache__", + "firecracker", +}) + + +def _resolve_path(path: Path) -> Path: + try: + return path.resolve() + except OSError: + return path.absolute() + + +def _is_relative_to(path: Path, parent: Path) -> bool: + try: + path.relative_to(parent) + except ValueError: + return False + return True + + +def build_project_watch_roots( + project_root: Path, + *, + ignored_root_names: Sequence[str] = tuple(IGNORED_PROJECT_ROOT_NAMES), +) -> tuple[Path, ...]: + """Return top-level project directories worth watching during development.""" + ignored_names = set(ignored_root_names) + resolved_project_root = _resolve_path(project_root) + return tuple( + sorted( + _resolve_path(child) + for child in resolved_project_root.iterdir() + if child.is_dir() and not child.name.startswith(".") and child.name not in ignored_names + ), + ) + + +class TussilagoWatchfilesReloader(django_watchfiles.WatchfilesReloader): + """Use child directories instead of repo root for dev autoreload watches.""" + + def __init__( + self, + *, + project_root: Path, + ignored_paths: Sequence[Path] | None = None, + ) -> None: + """Store project-specific watch settings before watcher startup.""" + self.project_root = _resolve_path(project_root) + self.ignored_paths = tuple( + _resolve_path(path) for path in (ignored_paths or (self.project_root / "firecracker",)) + ) + self.project_watch_roots = build_project_watch_roots(self.project_root) + super().__init__() + + def file_filter(self, change: Change, filename: str) -> bool: + """Drop file events from ignored paths such as the firecracker tree. + + Returns: + True when the file event should still be watched. + """ + resolved_path = _resolve_path(Path(filename)) + if any(_is_relative_to(resolved_path, ignored_path) for ignored_path in self.ignored_paths): + return False + return super().file_filter(change, filename) + + def watched_roots(self, watched_files: Iterable[Path]) -> frozenset[Path]: + """Replace repo-root watches with top-level child directories. + + Returns: + Watch roots with the project root expanded into child directories. + """ + roots = {_resolve_path(root) for root in super().watched_roots(watched_files)} + filtered_roots = { + root + for root in roots + if not any(_is_relative_to(root, ignored_path) for ignored_path in self.ignored_paths) + } + if self.project_root in filtered_roots: + filtered_roots.remove(self.project_root) + filtered_roots.update(self.project_watch_roots) + return frozenset(filtered_roots) + + +def install_watchfiles_reloader_patch(*, project_root: Path) -> None: + """Install project-specific watchfiles reloader for dev and test runtimes.""" + resolved_project_root = _resolve_path(project_root) + + class ProjectWatchfilesReloader(TussilagoWatchfilesReloader): + def __init__(self) -> None: + super().__init__(project_root=resolved_project_root) + + def replaced_get_reloader() -> autoreload.BaseReloader: + return ProjectWatchfilesReloader() + + autoreload.get_reloader = replaced_get_reloader diff --git a/config/settings.py b/config/settings.py index f8e40d5..bb77dbb 100644 --- a/config/settings.py +++ b/config/settings.py @@ -70,6 +70,7 @@ if DEBUG: INSTALLED_APPS: list[str] = [ "config.apps.TussilagoConfig", + "control_plane.apps.ControlPlaneConfig", "django.contrib.admin", "django.contrib.auth", "django.contrib.contenttypes", @@ -122,6 +123,24 @@ DATABASES: dict[str, dict[str, str | Path | dict[str, str | int]]] = { }, } +DISABLE_SERVER_SIDE_CURSORS: bool = True + +CELERY_BROKER_URL: str = os.getenv( + "TUSSILAGO_CELERY_BROKER_URL", + "memory://" if DEBUG or TESTING else "", +) +CELERY_RESULT_BACKEND: str = os.getenv( + "TUSSILAGO_CELERY_RESULT_BACKEND", + "cache+memory://" if DEBUG or TESTING else "", +) +CELERY_ACCEPT_CONTENT: list[str] = ["json"] +CELERY_TASK_SERIALIZER: str = "json" +CELERY_RESULT_SERIALIZER: str = "json" +CELERY_TIMEZONE: str = TIME_ZONE +CELERY_TASK_ALWAYS_EAGER: bool = TESTING +CELERY_TASK_EAGER_PROPAGATES: bool = TESTING +CELERY_TASK_DEFAULT_QUEUE: str = "control-plane" + AUTH_PASSWORD_VALIDATORS: list[dict[str, str]] = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", @@ -186,5 +205,9 @@ if DEBUG or TESTING: "django_browser_reload", )) + from config.dev_autoreload import install_watchfiles_reloader_patch + + install_watchfiles_reloader_patch(project_root=BASE_DIR) + # Customize the config to support htmx boosting. DEBUG_TOOLBAR_CONFIG: dict[str, str] = {"ROOT_TAG_EXTRA_ATTRS": "hx-preserve"} diff --git a/config/urls.py b/config/urls.py index 078a974..d30c0b6 100644 --- a/config/urls.py +++ b/config/urls.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from django.urls.resolvers import URLResolver urlpatterns: list[URLPattern | URLResolver] = [ + path(route="", view=include(arg="control_plane.urls")), path(route="admin/", view=admin.site.urls), ] diff --git a/conftest.py b/conftest.py index 294409d..16f7a68 100644 --- a/conftest.py +++ b/conftest.py @@ -1,3 +1,4 @@ +import os from typing import TYPE_CHECKING from typing import Any @@ -13,3 +14,17 @@ def use_zeal() -> Generator[None, Any]: """Enable Zeal N+1 detection context for each pytest test.""" with zeal.zeal_context(): yield + + +def pytest_configure(config: pytest.Config) -> None: + """Register local markers used by opt-in host smoke coverage.""" + config.addinivalue_line( + "markers", + "host_smoke: opt-in host-level smoke tests that spawn real local processes", + ) + + +@pytest.fixture +def host_smoke_enabled() -> bool: + """Return whether opt-in host smoke coverage should run.""" + return os.getenv("TUSSILAGO_RUN_HOST_SMOKE", "0") == "1" diff --git a/control_plane/__init__.py b/control_plane/__init__.py new file mode 100644 index 0000000..6f5afbe --- /dev/null +++ b/control_plane/__init__.py @@ -0,0 +1 @@ +"""Control-plane models and runtime helpers for hosted deployments.""" diff --git a/control_plane/admin.py b/control_plane/admin.py new file mode 100644 index 0000000..51afcec --- /dev/null +++ b/control_plane/admin.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from django.contrib import admin +from django.contrib import messages +from django.db.models import Count +from django.db.models import F + +from control_plane.models import Deployment +from control_plane.models import HostedSite +from control_plane.models import RuntimeService +from control_plane.models import RuntimeServiceKind +from control_plane.models import Tenant +from control_plane.tasks import provision_test_runtime_services + +if TYPE_CHECKING: + from django.db.models import QuerySet + from django.http import HttpRequest + + RuntimeServiceInlineBase = admin.StackedInline[RuntimeService] + TenantAdminBase = admin.ModelAdmin[Tenant] + HostedSiteAdminBase = admin.ModelAdmin[HostedSite] + DeploymentAdminBase = admin.ModelAdmin[Deployment] + RuntimeServiceAdminBase = admin.ModelAdmin[RuntimeService] +else: + RuntimeServiceInlineBase = admin.StackedInline + TenantAdminBase = admin.ModelAdmin + HostedSiteAdminBase = admin.ModelAdmin + DeploymentAdminBase = admin.ModelAdmin + RuntimeServiceAdminBase = admin.ModelAdmin + + +class RuntimeServiceInline(RuntimeServiceInlineBase): + """Allow deployment admins to create/edit related runtime services inline.""" + + model = RuntimeService + extra = 0 + max_num = len(RuntimeServiceKind) + show_change_link = True + + +@admin.register(Tenant) +class TenantAdmin(TenantAdminBase): + """Expose tenants for admin-managed smoke data setup.""" + + list_display = ("slug", "display_name") + search_fields = ("slug", "display_name") + ordering = ("slug",) + + +@admin.register(HostedSite) +class HostedSiteAdmin(HostedSiteAdminBase): + """Expose hosted sites so admins can build deployment test graphs.""" + + list_display = ("slug", "display_name", "tenant_slug", "service_port") + list_filter = ("tenant",) + search_fields = ( + "slug", + "display_name", + "tenant__slug", + "tenant__display_name", + "wsgi_module", + ) + ordering = ("tenant__slug", "slug") + autocomplete_fields = ("tenant",) + list_select_related = ("tenant",) + + def get_queryset(self, request: HttpRequest) -> QuerySet[HostedSite]: + """Load tenant slug values for changelist rendering. + + Returns: + Hosted site queryset with tenant join and tenant slug annotation. + """ + return ( + super() + .get_queryset(request) + .select_related("tenant") + .annotate( + tenant_slug_value=F("tenant__slug"), + ) + ) + + @admin.display(ordering="tenant__slug", description="Tenant") + def tenant_slug(self, hosted_site: HostedSite) -> str: + """Return tenant slug for changelist display and sorting.""" + return str(vars(hosted_site)["tenant_slug_value"]) + + +@admin.register(Deployment) +class DeploymentAdmin(DeploymentAdminBase): + """Expose deployments and queue test container provisioning.""" + + list_display = ( + "id", + "status", + "tenant_slug", + "site_slug", + "idempotency_key", + "guest_port", + "runtime_service_total", + ) + list_filter = ("status",) + search_fields = ( + "=id", + "idempotency_key", + "firecracker_vm_id", + "hosted_site__slug", + "hosted_site__tenant__slug", + ) + ordering = ("hosted_site__tenant__slug", "hosted_site__slug", "-created_at") + autocomplete_fields = ("hosted_site",) + list_select_related = ("hosted_site__tenant",) + inlines = (RuntimeServiceInline,) + actions = ("create_test_containers",) + + def get_queryset(self, request: HttpRequest) -> QuerySet[Deployment]: + """Load related hosted site and tenant rows for admin rendering. + + Returns: + Deployment queryset with hosted site and tenant joined. + """ + return ( + super() + .get_queryset(request) + .select_related("hosted_site__tenant") + .annotate( + tenant_slug_value=F("hosted_site__tenant__slug"), + site_slug_value=F("hosted_site__slug"), + runtime_service_total_value=Count("runtime_services", distinct=True), + ) + ) + + @admin.display(ordering="hosted_site__tenant__slug", description="Tenant") + def tenant_slug(self, deployment: Deployment) -> str: + """Return tenant slug for changelist display and sorting.""" + return str(vars(deployment)["tenant_slug_value"]) + + @admin.display(ordering="hosted_site__slug", description="Site") + def site_slug(self, deployment: Deployment) -> str: + """Return hosted site slug for changelist display and sorting.""" + return str(vars(deployment)["site_slug_value"]) + + @admin.display(description="Runtime services") + def runtime_service_total(self, deployment: Deployment) -> int: + """Return total runtime services currently linked to a deployment.""" + return int(vars(deployment)["runtime_service_total_value"]) + + @admin.action(description="Queue test container provisioning") + def create_test_containers( + self, + request: HttpRequest, + queryset: QuerySet[Deployment], + ) -> None: + """Queue Celery jobs that seed and provision local test containers.""" + deployment_ids = [str(deployment_id) for deployment_id in queryset.values_list("id", flat=True)] + for deployment_id in deployment_ids: + provision_test_runtime_services.delay(deployment_id) + + self.message_user( + request, + ( + f"Queued test container provisioning for {len(deployment_ids)} deployments. " + "Run a Celery worker to execute queued jobs." + ), + level=messages.SUCCESS, + ) + + +@admin.register(RuntimeService) +class RuntimeServiceAdmin(RuntimeServiceAdminBase): + """Expose runtime service containers to Django admin users.""" + + list_display = ( + "container_name", + "kind", + "status", + "tenant_slug", + "site_slug", + "internal_port", + ) + list_filter = ("kind", "status") + search_fields = ( + "container_name", + "network_name", + "hostname", + "deployment__idempotency_key", + "deployment__hosted_site__slug", + "deployment__hosted_site__tenant__slug", + ) + ordering = ( + "deployment__hosted_site__tenant__slug", + "deployment__hosted_site__slug", + "kind", + ) + autocomplete_fields = ("deployment",) + list_select_related = ("deployment__hosted_site__tenant",) + + def get_queryset(self, request: HttpRequest) -> QuerySet[RuntimeService]: + """Load related deployment context for changelist rendering. + + Returns: + Runtime service queryset with deployment, site, and tenant joined. + """ + return ( + super() + .get_queryset(request) + .select_related("deployment__hosted_site__tenant") + .annotate( + tenant_slug_value=F("deployment__hosted_site__tenant__slug"), + site_slug_value=F("deployment__hosted_site__slug"), + ) + ) + + @admin.display(ordering="deployment__hosted_site__tenant__slug", description="Tenant") + def tenant_slug(self, runtime_service: RuntimeService) -> str: + """Return tenant slug for changelist display and sorting.""" + return str(vars(runtime_service)["tenant_slug_value"]) + + @admin.display(ordering="deployment__hosted_site__slug", description="Site") + def site_slug(self, runtime_service: RuntimeService) -> str: + """Return hosted site slug for changelist display and sorting.""" + return str(vars(runtime_service)["site_slug_value"]) diff --git a/control_plane/apps.py b/control_plane/apps.py new file mode 100644 index 0000000..bdf369a --- /dev/null +++ b/control_plane/apps.py @@ -0,0 +1,8 @@ +from django.apps import AppConfig + + +class ControlPlaneConfig(AppConfig): + """Register control-plane models and task discovery.""" + + name = "control_plane" + verbose_name = "Tussilago Control Plane" diff --git a/control_plane/container_assets/test_django/Containerfile b/control_plane/container_assets/test_django/Containerfile new file mode 100644 index 0000000..cccd84f --- /dev/null +++ b/control_plane/container_assets/test_django/Containerfile @@ -0,0 +1,13 @@ +FROM docker.io/library/python:3.14-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +RUN python -m pip install --no-cache-dir \ + "django>=6.0.4" \ + "gunicorn>=23.0.0" \ + "psycopg[binary]>=3.2.9" \ + "redis>=6.0.0" + +WORKDIR /srv/test-app diff --git a/control_plane/host_commands.py b/control_plane/host_commands.py new file mode 100644 index 0000000..eb49f03 --- /dev/null +++ b/control_plane/host_commands.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +import logging +import os +import shlex +import subprocess # noqa: S404 +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Mapping + from collections.abc import Sequence + from pathlib import Path + +logger = logging.getLogger("tussilago.control_plane.host_commands") + +DEFAULT_INHERITED_ENV_KEYS: frozenset[str] = frozenset( + { + "HOME", + "LANG", + "LC_ALL", + "LC_CTYPE", + "LOGNAME", + "PATH", + "SSL_CERT_DIR", + "SSL_CERT_FILE", + "TMPDIR", + "USER", + "UV_CACHE_DIR", + "VIRTUAL_ENV", + "XDG_CACHE_HOME", + "XDG_RUNTIME_DIR", + }, +) + + +@dataclass(frozen=True, slots=True) +class HostCommandResult: + """Capture output from a completed host-side command.""" + + args: tuple[str, ...] + returncode: int + stdout: str + stderr: str + + +class HostCommandError(RuntimeError): + """Raised when a host-side command fails or times out.""" + + def __init__( + self, + message: str, + *, + args: Sequence[str], + returncode: int | None, + stdout: str, + stderr: str, + ) -> None: + """Store captured command context for later error reporting.""" + super().__init__(message) + self.command_args = tuple(args) + self.returncode = returncode + self.stdout = stdout + self.stderr = stderr + + +def build_host_command_env( + *, + env_overrides: Mapping[str, str] | None = None, + allowed_env_keys: frozenset[str] | None = None, + inherited_env_keys: frozenset[str] = DEFAULT_INHERITED_ENV_KEYS, +) -> dict[str, str]: + """Build a sanitized environment for host-side child processes. + + Returns: + A filtered environment dictionary suitable for subprocess execution. + + Raises: + ValueError: If env overrides are provided without an allowlist. + """ + resolved_env = {key: value for key, value in os.environ.items() if key in inherited_env_keys} + + if env_overrides is None: + return resolved_env + + if allowed_env_keys is None: + msg = "allowed_env_keys is required when env_overrides are provided" + raise ValueError(msg) + + disallowed_keys = sorted(set(env_overrides).difference(allowed_env_keys)) + if disallowed_keys: + msg = f"env_overrides contains disallowed keys: {', '.join(disallowed_keys)}" + raise ValueError(msg) + + resolved_env.update(env_overrides) + return resolved_env + + +def run_host_command( + *, + command: Sequence[str], + cwd: Path | None = None, + env_overrides: Mapping[str, str] | None = None, + allowed_env_keys: frozenset[str] | None = None, + timeout_seconds: float = 60.0, +) -> HostCommandResult: + """Run a host-side command with explicit environment and timeout controls. + + Returns: + A result object containing the command, return code, and captured output. + + Raises: + ValueError: If the command is empty or env overrides are not allowlisted. + HostCommandError: If the command fails or times out. + """ + normalized_command = tuple(command) + if not normalized_command: + msg = "command must not be empty" + raise ValueError(msg) + + if any(not argument for argument in normalized_command): + msg = "command arguments must be non-empty strings" + raise ValueError(msg) + + resolved_env = build_host_command_env( + env_overrides=env_overrides, + allowed_env_keys=allowed_env_keys, + ) + + logger.debug( + "Running host command executable=%s argc=%s (cwd=%s)", + shlex.quote(normalized_command[0]), + len(normalized_command), + cwd, + ) + + try: + completed = subprocess.run( # noqa: S603 + normalized_command, + check=True, + capture_output=True, + text=True, + cwd=cwd, + env=resolved_env, + timeout=timeout_seconds, + ) + except subprocess.CalledProcessError as error: + msg_0 = "Host command failed." + raise HostCommandError( + msg_0, + args=tuple(str(argument) for argument in error.cmd), + returncode=error.returncode, + stdout=error.stdout or "", + stderr=error.stderr or "", + ) from error + except subprocess.TimeoutExpired as error: + msg_0 = "Host command timed out." + raise HostCommandError( + msg_0, + args=normalized_command, + returncode=None, + stdout=str(error.stdout) or "", + stderr=str(error.stderr) or "", + ) from error + + return HostCommandResult( + args=normalized_command, + returncode=completed.returncode, + stdout=completed.stdout, + stderr=completed.stderr, + ) diff --git a/control_plane/local_test_deployment.py b/control_plane/local_test_deployment.py new file mode 100644 index 0000000..6a93f48 --- /dev/null +++ b/control_plane/local_test_deployment.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +import hashlib +import secrets +import socket +import time +from dataclasses import dataclass + +from celery import chain +from django.conf import settings +from django.db import transaction +from django.utils import timezone + +from control_plane.host_commands import HostCommandError +from control_plane.local_test_runtime import build_test_django_local_url +from control_plane.models import Deployment +from control_plane.models import DeploymentStatus +from control_plane.models import HostedSite +from control_plane.models import Tenant +from control_plane.observability import capture_test_deployment_diagnostics +from control_plane.tasks import mark_deployment_booting +from control_plane.tasks import mark_deployment_provisioning +from control_plane.tasks import provision_test_django_runtime +from control_plane.tasks import provision_test_runtime_services +from control_plane.tasks import run_test_django_runtime_provisioning + + +@dataclass(frozen=True, slots=True) +class CreatedTestDeployment: + """Bundle control-plane rows created for one local test deployment.""" + + tenant: Tenant + hosted_site: HostedSite + deployment: Deployment + + @property + def sentinel_url(self) -> str: + """Return published local sentinel URL for this deployment.""" + return build_test_django_local_url(self.deployment) + + +def create_test_deployment() -> CreatedTestDeployment: + """Create a randomized tenant, hosted site, and deployment for local testing. + + Returns: + Newly created tenant, hosted site, and deployment rows. + """ + tenant_token = secrets.token_hex(4) + site_token = secrets.token_hex(4) + tenant_slug = f"tenant-{tenant_token}" + site_slug = f"site-{site_token}" + idempotency_key = f"test-deploy-{secrets.token_hex(8)}" + guest_port = _find_free_port() + source_sha256 = hashlib.sha256( + f"{tenant_slug}:{site_slug}:{idempotency_key}".encode(), + ).hexdigest() + + with transaction.atomic(): + tenant = Tenant.objects.create( + slug=tenant_slug, + display_name=f"Test Tenant {tenant_token.upper()}", + ) + hosted_site = HostedSite.objects.create( + tenant=tenant, + slug=site_slug, + display_name=f"Test Site {site_token.upper()}", + wsgi_module="tenant_site.wsgi:application", + service_port=guest_port, + ) + deployment = Deployment.objects.create( + hosted_site=hosted_site, + idempotency_key=idempotency_key, + source_sha256=source_sha256, + guest_port=guest_port, + ) + + return CreatedTestDeployment( + tenant=tenant, + hosted_site=hosted_site, + deployment=deployment, + ) + + +def queue_test_deployment_provisioning(deployment_id: str) -> str: + """Queue full local test deployment Celery chain and return task id. + + Returns: + Celery task id for the queued orchestration chain. + """ + _ensure_async_broker_configuration() + result = chain( + mark_deployment_provisioning.si(deployment_id), + provision_test_runtime_services.si(deployment_id), + mark_deployment_booting.si(deployment_id), + provision_test_django_runtime.si(deployment_id), + ).apply_async() + return str(result.id) + + +def wait_for_test_deployment( + deployment_id: str, + *, + timeout_seconds: float, + poll_interval_seconds: float, +) -> Deployment: + """Wait until a queued local test deployment becomes running or fails. + + Returns: + Deployment row in running state. + + Raises: + RuntimeError: If deployment reaches failed state. + TimeoutError: If deployment does not finish before timeout. + """ + deadline = time.monotonic() + timeout_seconds + while True: + deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id) + if deployment.status == DeploymentStatus.RUNNING.value: + return deployment + if deployment.status == DeploymentStatus.FAILED.value: + failure_message = deployment.last_error or "Local test deployment failed." + raise RuntimeError(failure_message) + if time.monotonic() >= deadline: + msg = ( + "Timed out waiting for local test deployment " + f"{deployment.id} to become ready. Current status: {deployment.status}." + ) + raise TimeoutError(msg) + + time.sleep(poll_interval_seconds) + + +def provision_test_deployment(deployment_id: str) -> Deployment: + """Run full local test deployment provisioning inline in the current process. + + Returns: + Deployment row after provisioning completes. + + Raises: + RuntimeError: If runtime provisioning fails. + TimeoutError: If the Django sentinel endpoint never becomes ready. + ValueError: If runtime configuration is invalid. + """ + try: + mark_deployment_provisioning.run(deployment_id) + provision_test_runtime_services.run(deployment_id) + mark_deployment_booting.run(deployment_id) + run_test_django_runtime_provisioning(deployment_id) + except HostCommandError as error: + message = _build_host_command_failure_message(error) + _mark_inline_deployment_failed(deployment_id, message=message) + _capture_test_deployment_diagnostics_snapshot(deployment_id) + raise RuntimeError(message) from error + except (RuntimeError, TimeoutError, ValueError) as error: + _mark_inline_deployment_failed(deployment_id, message=str(error)) + _capture_test_deployment_diagnostics_snapshot(deployment_id) + raise + + return Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id) + + +def _ensure_async_broker_configuration() -> None: + broker_url = settings.CELERY_BROKER_URL + if not broker_url: + msg = "Async queueing requires TUSSILAGO_CELERY_BROKER_URL to be set to a real broker URL." + raise RuntimeError(msg) + + if broker_url == "memory://": + msg = ( + "Async queueing cannot use memory:// because the worker cannot consume tasks from another process. " + "Set TUSSILAGO_CELERY_BROKER_URL to a real broker such as Redis or RabbitMQ." + ) + raise RuntimeError(msg) + + +def _mark_inline_deployment_failed(deployment_id: str, *, message: str) -> None: + deployment = Deployment.objects.get(pk=deployment_id) + if deployment.status == DeploymentStatus.FAILED.value: + return + + deployment.status = DeploymentStatus.FAILED.value + deployment.last_error = message + deployment.finished_at = timezone.now() + deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"]) + + +def _build_host_command_failure_message(error: HostCommandError) -> str: + lines = [str(error)] + if error.stderr.strip(): + lines.append(error.stderr.strip()) + elif error.stdout.strip(): + lines.append(error.stdout.strip()) + + return "\n".join(lines) + + +def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None: + try: + capture_test_deployment_diagnostics(deployment_id) + except OSError: + return + except ValueError: + return + except Deployment.DoesNotExist: + return + + +def _find_free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe: + probe.bind(("127.0.0.1", 0)) + probe.listen(1) + return int(probe.getsockname()[1]) diff --git a/control_plane/local_test_runtime.py b/control_plane/local_test_runtime.py new file mode 100644 index 0000000..0564d91 --- /dev/null +++ b/control_plane/local_test_runtime.py @@ -0,0 +1,297 @@ +from __future__ import annotations + +from pathlib import Path +from textwrap import dedent +from typing import TYPE_CHECKING + +from django.conf import settings + +from control_plane.models import RuntimeServiceKind +from control_plane.models import _build_limited_identifier + +if TYPE_CHECKING: + from collections.abc import Iterable + + from control_plane.models import Deployment + from control_plane.models import RuntimeService + + +TEST_DJANGO_CONTAINER_PORT = 8000 +TEST_DJANGO_IMAGE_REFERENCE = "localhost/tussilago-test-django:latest" +TEST_DJANGO_WORKDIR = "/srv/test-app" +TEST_POSTGRES_AUTH_DIR = "/run/postgres-auth" +TEST_REDIS_AUTH_DIR = "/run/redis-auth" +TEST_POSTGRES_PASSWORD_FILE = f"{TEST_POSTGRES_AUTH_DIR}/password" +TEST_REDIS_PASSWORD_FILE = f"{TEST_REDIS_AUTH_DIR}/password" + + +def build_test_django_project_root(deployment: Deployment) -> Path: + """Return filesystem root for one generated local Django test app.""" + return Path(settings.DATA_DIR) / "test-deployments" / str(deployment.id) / "django-app" + + +def build_test_django_image_reference() -> str: + """Return Podman image reference for the reusable local Django runtime.""" + return TEST_DJANGO_IMAGE_REFERENCE + + +def build_test_django_containerfile_path() -> Path: + """Return checked-in Containerfile used for local Django test runtimes.""" + return Path(__file__).resolve().parent / "container_assets" / "test_django" / "Containerfile" + + +def build_test_django_container_context_path() -> Path: + """Return Podman build context for the reusable local Django runtime image.""" + return build_test_django_containerfile_path().parent + + +def build_test_django_local_url(deployment: Deployment) -> str: + """Return published sentinel URL for a local Django test deployment.""" + return f"http://127.0.0.1:{deployment.guest_port}/sentinel/" + + +def build_test_django_container_names(deployment: Deployment) -> tuple[str, str]: + """Return deterministic Podman container names for server and migrate steps.""" + deployment_suffix = deployment.id.hex[:12] + tenant_slug = deployment.hosted_site.tenant.slug + site_slug = deployment.hosted_site.slug + return ( + _build_limited_identifier( + prefix="django", + tenant_slug=tenant_slug, + site_slug=site_slug, + suffix=deployment_suffix, + max_length=128, + ), + _build_limited_identifier( + prefix="django-migrate", + tenant_slug=tenant_slug, + site_slug=site_slug, + suffix=deployment_suffix, + max_length=128, + ), + ) + + +def build_test_django_container_labels(deployment: Deployment) -> tuple[tuple[str, str], ...]: + """Return stable labels to simplify inspection and cleanup.""" + return ( + ("tussilago.deployment-id", str(deployment.id)), + ("tussilago.tenant-slug", deployment.hosted_site.tenant.slug), + ("tussilago.site-slug", deployment.hosted_site.slug), + ("tussilago.role", "django"), + ) + + +def build_test_django_environment( + deployment: Deployment, + runtime_services: Iterable[RuntimeService], +) -> tuple[tuple[str, str], ...]: + """Return container environment variables for the generated Django test app. + + Raises: + ValueError: If PostgreSQL or Redis runtime services are missing. + """ + postgres_service = _get_runtime_service(runtime_services, RuntimeServiceKind.POSTGRESQL.value) + redis_service = _get_runtime_service(runtime_services, RuntimeServiceKind.REDIS.value) + if not postgres_service.connection_database or not postgres_service.connection_username: + msg = "PostgreSQL runtime service is missing connection credentials." + raise ValueError(msg) + + return ( + ("DJANGO_SECRET_KEY", f"test-deployment-{deployment.id.hex}"), + ("DJANGO_SETTINGS_MODULE", "tenant_site.settings"), + ("PYTHONPATH", TEST_DJANGO_WORKDIR), + ("TEST_TENANT_SLUG", deployment.hosted_site.tenant.slug), + ("TEST_SITE_SLUG", deployment.hosted_site.slug), + ("TEST_POSTGRES_HOST", "127.0.0.1"), + ("TEST_POSTGRES_PORT", str(postgres_service.internal_port)), + ("TEST_POSTGRES_DATABASE", postgres_service.connection_database), + ("TEST_POSTGRES_USERNAME", postgres_service.connection_username), + ("TEST_POSTGRES_PASSWORD_FILE", TEST_POSTGRES_PASSWORD_FILE), + ("TEST_REDIS_HOST", "127.0.0.1"), + ("TEST_REDIS_PORT", str(redis_service.internal_port)), + ("TEST_REDIS_PASSWORD_FILE", TEST_REDIS_PASSWORD_FILE), + ) + + +def build_test_django_secret_mounts( + runtime_services: Iterable[RuntimeService], +) -> tuple[tuple[Path, str], ...]: + """Return host-to-container secret mounts for generated Django test apps.""" + postgres_service = _get_runtime_service(runtime_services, RuntimeServiceKind.POSTGRESQL.value) + redis_service = _get_runtime_service(runtime_services, RuntimeServiceKind.REDIS.value) + return ( + (_runtime_service_secret_directory(postgres_service), TEST_POSTGRES_AUTH_DIR), + (_runtime_service_secret_directory(redis_service), TEST_REDIS_AUTH_DIR), + ) + + +def write_test_django_project( + deployment: Deployment, + runtime_services: Iterable[RuntimeService], +) -> Path: + """Write deterministic Django project files for one deployment. + + Returns: + Root directory containing the generated Django project. + """ + build_test_django_environment(deployment, runtime_services) + + project_root = build_test_django_project_root(deployment) + package_root = project_root / "tenant_site" + package_root.mkdir(parents=True, exist_ok=True) + + (project_root / "manage.py").write_text(_manage_py_contents(), encoding="utf-8") + (package_root / "__init__.py").write_text("", encoding="utf-8") + (package_root / "settings.py").write_text(_settings_contents(), encoding="utf-8") + (package_root / "urls.py").write_text(_urls_contents(), encoding="utf-8") + (package_root / "wsgi.py").write_text(_wsgi_contents(), encoding="utf-8") + return project_root + + +def _get_runtime_service( + runtime_services: Iterable[RuntimeService], + kind: str, +) -> RuntimeService: + for runtime_service in runtime_services: + if runtime_service.kind == kind: + return runtime_service + + msg = f"Missing runtime service kind: {kind}" + raise ValueError(msg) + + +def _runtime_service_secret_directory(runtime_service: RuntimeService) -> Path: + return ( + Path(settings.DATA_DIR) + / "runtime-services" + / str(runtime_service.deployment_id) + / runtime_service.kind + / "secrets" + ) + + +def _manage_py_contents() -> str: + return dedent( + """ + #!/usr/bin/env python + import os + import sys + + + def main() -> None: + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tenant_site.settings") + from django.core.management import execute_from_command_line + + execute_from_command_line(sys.argv) + + + if __name__ == "__main__": + main() + """, + ).lstrip() + + +def _settings_contents() -> str: + return dedent( + """ + import os + from pathlib import Path + + + BASE_DIR = Path(__file__).resolve().parent.parent + SECRET_KEY = os.environ["DJANGO_SECRET_KEY"] + DEBUG = False + ALLOWED_HOSTS = ["127.0.0.1", "localhost"] + ROOT_URLCONF = "tenant_site.urls" + WSGI_APPLICATION = "tenant_site.wsgi.application" + INSTALLED_APPS = [ + "django.contrib.contenttypes", + ] + MIDDLEWARE = [] + TIME_ZONE = "UTC" + USE_TZ = True + DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + TEST_TENANT_SLUG = os.environ["TEST_TENANT_SLUG"] + TEST_SITE_SLUG = os.environ["TEST_SITE_SLUG"] + TEST_REDIS_HOST = os.environ["TEST_REDIS_HOST"] + TEST_REDIS_PORT = int(os.environ["TEST_REDIS_PORT"]) + + + def _read_secret(env_key: str) -> str: + return Path(os.environ[env_key]).read_text(encoding="utf-8").strip() + + + DATABASES = { + "default": { + "ENGINE": "django.db.backends.postgresql", + "NAME": os.environ["TEST_POSTGRES_DATABASE"], + "USER": os.environ["TEST_POSTGRES_USERNAME"], + "PASSWORD": _read_secret("TEST_POSTGRES_PASSWORD_FILE"), + "HOST": os.environ["TEST_POSTGRES_HOST"], + "PORT": int(os.environ["TEST_POSTGRES_PORT"]), + }, + } + TEST_REDIS_PASSWORD = _read_secret("TEST_REDIS_PASSWORD_FILE") + """, + ).lstrip() + + +def _urls_contents() -> str: + return dedent( + """ + import redis + + from django.conf import settings + from django.db import connection + from django.http import JsonResponse + from django.urls import path + + + def sentinel_view(request): + with connection.cursor() as cursor: + cursor.execute("SELECT 1") + postgres_value = int(cursor.fetchone()[0]) + + redis_key = f"sentinel:{settings.TEST_TENANT_SLUG}:{settings.TEST_SITE_SLUG}" + redis_client = redis.Redis( + host=settings.TEST_REDIS_HOST, + port=settings.TEST_REDIS_PORT, + password=settings.TEST_REDIS_PASSWORD, + decode_responses=True, + socket_timeout=1, + ) + redis_client.set(redis_key, settings.TEST_SITE_SLUG, ex=60) + redis_value = redis_client.get(redis_key) + return JsonResponse( + { + "status": "ok", + "postgres": postgres_value, + "redis": redis_value, + "tenant": settings.TEST_TENANT_SLUG, + "site": settings.TEST_SITE_SLUG, + }, + ) + + + urlpatterns = [ + path("sentinel/", sentinel_view), + ] + """, + ).lstrip() + + +def _wsgi_contents() -> str: + return dedent( + """ + import os + + from django.core.wsgi import get_wsgi_application + + + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tenant_site.settings") + + application = get_wsgi_application() + """, + ).lstrip() diff --git a/control_plane/management/__init__.py b/control_plane/management/__init__.py new file mode 100644 index 0000000..78bdb05 --- /dev/null +++ b/control_plane/management/__init__.py @@ -0,0 +1 @@ +"""Django management command package for control-plane workflows.""" diff --git a/control_plane/management/commands/__init__.py b/control_plane/management/commands/__init__.py new file mode 100644 index 0000000..7b75627 --- /dev/null +++ b/control_plane/management/commands/__init__.py @@ -0,0 +1 @@ +"""Management commands for local control-plane operations.""" diff --git a/control_plane/management/commands/create_test_deployment.py b/control_plane/management/commands/create_test_deployment.py new file mode 100644 index 0000000..b89981b --- /dev/null +++ b/control_plane/management/commands/create_test_deployment.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError + +from control_plane.local_test_deployment import create_test_deployment +from control_plane.local_test_deployment import provision_test_deployment +from control_plane.local_test_deployment import queue_test_deployment_provisioning + +if TYPE_CHECKING: + from argparse import ArgumentParser + + +class Command(BaseCommand): + """Create a randomized local test deployment and optionally wait for readiness.""" + + help = "Create a randomized tenant and provision a local test deployment inline by default." + + def add_arguments(self, parser: ArgumentParser) -> None: + """Register CLI flags for local test deployment orchestration.""" + parser.add_argument( + "--no-wait", + action="store_true", + help="Queue provisioning asynchronously and return immediately without running it inline.", + ) + + def handle(self, *_args: str, **options: bool | float) -> None: + """Create a randomized local test deployment and optionally wait for readiness. + + Raises: + CommandError: If the deployment fails or never becomes ready. + """ + created = create_test_deployment() + self.stdout.write(f"tenant_slug={created.tenant.slug}") + self.stdout.write(f"site_slug={created.hosted_site.slug}") + self.stdout.write(f"deployment_id={created.deployment.id}") + self.stdout.write(f"sentinel_url={created.sentinel_url}") + + if options["no_wait"]: + try: + task_id = queue_test_deployment_provisioning(str(created.deployment.id)) + except RuntimeError as error: + raise CommandError(str(error)) from error + + self.stdout.write(f"celery_task_id={task_id}") + self.stdout.write("status=queued") + return + + self.stdout.write("execution_mode=inline") + try: + deployment = provision_test_deployment(str(created.deployment.id)) + except (RuntimeError, TimeoutError, ValueError) as error: + raise CommandError(str(error)) from error + + self.stdout.write(f"status={deployment.status}") diff --git a/control_plane/migrations/0001_initial.py b/control_plane/migrations/0001_initial.py new file mode 100644 index 0000000..7330454 --- /dev/null +++ b/control_plane/migrations/0001_initial.py @@ -0,0 +1,289 @@ +# Generated by Django 6.0.4 on 2026-04-27 12:21 + +import uuid + +import auto_prefetch +import django.core.validators +import django.db.models.deletion +import django.db.models.manager +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="HostedSite", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("slug", models.SlugField(max_length=64)), + ("display_name", models.CharField(max_length=255)), + ("working_directory", models.CharField(default=".", max_length=255)), + ("wsgi_module", models.CharField(max_length=255)), + ( + "service_port", + models.PositiveIntegerField( + default=8000, + validators=[ + django.core.validators.MinValueValidator(1024), + django.core.validators.MaxValueValidator(65535), + ], + ), + ), + ], + options={ + "ordering": ("tenant__slug", "slug"), + "abstract": False, + "base_manager_name": "prefetch_manager", + }, + managers=[ + ("objects", django.db.models.manager.Manager()), + ("prefetch_manager", django.db.models.manager.Manager()), + ], + ), + migrations.CreateModel( + name="Deployment", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("idempotency_key", models.CharField(max_length=64, unique=True)), + ("source_sha256", models.CharField(max_length=64)), + ( + "status", + models.CharField( + choices=[ + ("queued", "Queued"), + ("provisioning", "Provisioning"), + ("booting", "Booting"), + ("running", "Running"), + ("failed", "Failed"), + ("stopped", "Stopped"), + ("destroying", "Destroying"), + ("destroyed", "Destroyed"), + ], + default="queued", + max_length=32, + ), + ), + ( + "guest_ipv4", + models.GenericIPAddressField( + blank=True, + null=True, + protocol="IPv4", + ), + ), + ( + "guest_port", + models.PositiveIntegerField( + default=8000, + validators=[ + django.core.validators.MinValueValidator(1024), + django.core.validators.MaxValueValidator(65535), + ], + ), + ), + ( + "firecracker_vm_id", + models.CharField(blank=True, max_length=64, null=True, unique=True), + ), + ("last_error", models.TextField(blank=True)), + ("started_at", models.DateTimeField(blank=True, null=True)), + ("finished_at", models.DateTimeField(blank=True, null=True)), + ( + "hosted_site", + auto_prefetch.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="deployments", + to="control_plane.hostedsite", + ), + ), + ], + options={ + "ordering": ("-created_at",), + "abstract": False, + "base_manager_name": "prefetch_manager", + }, + managers=[ + ("objects", django.db.models.manager.Manager()), + ("prefetch_manager", django.db.models.manager.Manager()), + ], + ), + migrations.CreateModel( + name="RuntimeService", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "kind", + models.CharField( + choices=[("postgresql", "PostgreSQL"), ("redis", "Redis")], + max_length=32, + ), + ), + ( + "status", + models.CharField( + choices=[ + ("queued", "Queued"), + ("provisioning", "Provisioning"), + ("ready", "Ready"), + ("failed", "Failed"), + ("destroying", "Destroying"), + ("destroyed", "Destroyed"), + ], + default="queued", + max_length=32, + ), + ), + ("container_name", models.CharField(max_length=128, unique=True)), + ("network_name", models.CharField(max_length=128)), + ("hostname", models.CharField(max_length=128)), + ("image_reference", models.CharField(max_length=255)), + ( + "internal_port", + models.PositiveIntegerField( + validators=[ + django.core.validators.MinValueValidator(1), + django.core.validators.MaxValueValidator(65535), + ], + ), + ), + ("connection_username", models.CharField(blank=True, max_length=63)), + ("connection_database", models.CharField(blank=True, max_length=63)), + ("connection_secret_ref", models.CharField(max_length=255)), + ( + "deployment", + auto_prefetch.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="runtime_services", + to="control_plane.deployment", + ), + ), + ], + options={ + "ordering": ("deployment__created_at", "kind"), + "abstract": False, + "base_manager_name": "prefetch_manager", + }, + managers=[ + ("objects", django.db.models.manager.Manager()), + ("prefetch_manager", django.db.models.manager.Manager()), + ], + ), + migrations.CreateModel( + name="Tenant", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ("slug", models.SlugField(max_length=64, unique=True)), + ("display_name", models.CharField(max_length=255)), + ], + options={ + "ordering": ("slug",), + "abstract": False, + "base_manager_name": "prefetch_manager", + "indexes": [models.Index(fields=["slug"], name="tenant_slug_idx")], + }, + managers=[ + ("objects", django.db.models.manager.Manager()), + ("prefetch_manager", django.db.models.manager.Manager()), + ], + ), + migrations.AddField( + model_name="hostedsite", + name="tenant", + field=auto_prefetch.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="hosted_sites", + to="control_plane.tenant", + ), + ), + migrations.AddIndex( + model_name="deployment", + index=models.Index( + fields=["hosted_site", "status"], + name="deploy_site_status_idx", + ), + ), + migrations.AddIndex( + model_name="deployment", + index=models.Index( + fields=["status", "created_at"], + name="deploy_status_created_idx", + ), + ), + migrations.AddIndex( + model_name="runtimeservice", + index=models.Index( + fields=["deployment", "kind"], + name="service_deploy_kind_idx", + ), + ), + migrations.AddIndex( + model_name="runtimeservice", + index=models.Index( + fields=["kind", "status"], + name="service_kind_status_idx", + ), + ), + migrations.AddConstraint( + model_name="runtimeservice", + constraint=models.UniqueConstraint( + fields=("deployment", "kind"), + name="runtime_service_unique_deployment_kind", + ), + ), + migrations.AddIndex( + model_name="hostedsite", + index=models.Index(fields=["tenant", "slug"], name="site_tenant_slug_idx"), + ), + migrations.AddConstraint( + model_name="hostedsite", + constraint=models.UniqueConstraint( + fields=("tenant", "slug"), + name="hosted_site_unique_tenant_slug", + ), + ), + ] diff --git a/control_plane/migrations/__init__.py b/control_plane/migrations/__init__.py new file mode 100644 index 0000000..5f259fa --- /dev/null +++ b/control_plane/migrations/__init__.py @@ -0,0 +1 @@ +"""Migration package for control-plane models.""" diff --git a/control_plane/models.py b/control_plane/models.py new file mode 100644 index 0000000..b239927 --- /dev/null +++ b/control_plane/models.py @@ -0,0 +1,340 @@ +from __future__ import annotations + +import uuid +from dataclasses import dataclass + +import auto_prefetch +from auto_prefetch import ForeignKey +from auto_prefetch import Manager +from django.core.validators import MaxValueValidator +from django.core.validators import MinValueValidator +from django.db import models +from django.db import transaction + +from control_plane.runtime_plans import DjangoApplicationLaunchConfig +from control_plane.runtime_plans import build_django_server_command + + +class TimestampedModel(auto_prefetch.Model): + """Provide created and updated timestamps for control-plane records.""" + + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta(auto_prefetch.Model.Meta): + abstract = True + + +class DeploymentStatus(models.TextChoices): + """Track deployment lifecycle state inside control plane.""" + + QUEUED = "queued", "Queued" + PROVISIONING = "provisioning", "Provisioning" + BOOTING = "booting", "Booting" + RUNNING = "running", "Running" + FAILED = "failed", "Failed" + STOPPED = "stopped", "Stopped" + DESTROYING = "destroying", "Destroying" + DESTROYED = "destroyed", "Destroyed" + + +class RuntimeServiceKind(models.TextChoices): + """Enumerate deployment-scoped backing services.""" + + POSTGRESQL = "postgresql", "PostgreSQL" + REDIS = "redis", "Redis" + + +class RuntimeServiceStatus(models.TextChoices): + """Track lifecycle state for a deployment-scoped service.""" + + QUEUED = "queued", "Queued" + PROVISIONING = "provisioning", "Provisioning" + READY = "ready", "Ready" + FAILED = "failed", "Failed" + DESTROYING = "destroying", "Destroying" + DESTROYED = "destroyed", "Destroyed" + + +@dataclass(frozen=True, slots=True) +class RuntimeServiceSeedSpec: + """Describe default values for admin-seeded test runtime services.""" + + hostname: str + image_reference: str + internal_port: int + + +RUNTIME_SERVICE_SEED_SPECS: dict[RuntimeServiceKind, RuntimeServiceSeedSpec] = { + RuntimeServiceKind.POSTGRESQL: RuntimeServiceSeedSpec( + hostname="postgres.internal", + image_reference="docker.io/library/postgres:17-alpine", + internal_port=5432, + ), + RuntimeServiceKind.REDIS: RuntimeServiceSeedSpec( + hostname="redis.internal", + image_reference="docker.io/library/redis:7.4-alpine", + internal_port=6379, + ), +} + + +def _build_limited_identifier( + *, + prefix: str, + tenant_slug: str, + site_slug: str, + suffix: str, + max_length: int, +) -> str: + """Build a bounded identifier while preserving deployment uniqueness. + + Args: + prefix: Static prefix to identify the type of resource (e.g. "net" or + "postgres"). + tenant_slug: Hosted site tenant slug to include in the name for uniqueness. + site_slug: Hosted site slug to include in the name for uniqueness. + suffix: Unique suffix to ensure no collisions across deployments of the same site. + max_length: Maximum length for the resulting identifier. + + Returns: + A string that combines the prefix, tenant slug, site slug, and suffix, + truncated as needed to fit within max_length. + """ + candidate = f"{prefix}-{tenant_slug}-{site_slug}-{suffix}" + if len(candidate) <= max_length: + return candidate + + min_length = len(prefix) + len(suffix) + 2 + if min_length >= max_length: + return f"{prefix}-{suffix}"[:max_length] + + remaining_length = max_length - len(prefix) - len(suffix) - 3 + tenant_budget = max(1, remaining_length // 2) + site_budget = max(1, remaining_length - tenant_budget) + return "-".join( + ( + prefix, + tenant_slug[:tenant_budget], + site_slug[:site_budget], + suffix, + ), + ) + + +def _build_limited_connection_name(*, site_slug: str, suffix: str, max_length: int = 63) -> str: + """Build a bounded database identifier that stays unique per deployment. + + Args: + site_slug: Hosted site slug to include in the name for uniqueness. + suffix: Unique suffix to ensure no collisions across deployments of the same site. + max_length: Maximum length for the resulting identifier, defaulting to 63 for database compatibility + + Returns: + A string that combines the site slug and suffix, truncated as needed to fit within max_length. + """ + candidate = f"{site_slug}-{suffix}" + if len(candidate) <= max_length: + return candidate + + min_length = len(suffix) + 1 + if min_length >= max_length: + return suffix[:max_length] + + site_budget = max_length - len(suffix) - 1 + return f"{site_slug[:site_budget]}-{suffix}" + + +class Tenant(TimestampedModel): + """Represent a tenant that owns hosted applications and deployments.""" + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + slug = models.SlugField(max_length=64, unique=True) + display_name = models.CharField(max_length=255) + + objects = Manager() + + class Meta(TimestampedModel.Meta): + ordering = ("slug",) + indexes = [models.Index(fields=("slug",), name="tenant_slug_idx")] + + def __str__(self) -> str: + return self.display_name + + +class HostedSite(TimestampedModel): + """Describe a deployable Django site owned by a tenant.""" + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + tenant = ForeignKey(Tenant, on_delete=models.CASCADE, related_name="hosted_sites") + slug = models.SlugField(max_length=64) + display_name = models.CharField(max_length=255) + working_directory = models.CharField(max_length=255, default=".") + wsgi_module = models.CharField(max_length=255) + service_port = models.PositiveIntegerField( + default=8000, + validators=[MinValueValidator(1024), MaxValueValidator(65535)], + ) + + objects = Manager() + + class Meta(TimestampedModel.Meta): + ordering = ("tenant__slug", "slug") + constraints = [ + models.UniqueConstraint( + fields=("tenant", "slug"), + name="hosted_site_unique_tenant_slug", + ), + ] + indexes = [ + models.Index(fields=("tenant", "slug"), name="site_tenant_slug_idx"), + ] + + def __str__(self) -> str: + return f"{self.tenant.slug}/{self.slug}" + + +class Deployment(TimestampedModel): + """Track a single deployable runtime instance for a hosted site.""" + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + hosted_site = ForeignKey(HostedSite, on_delete=models.CASCADE, related_name="deployments") + idempotency_key = models.CharField(max_length=64, unique=True) + source_sha256 = models.CharField(max_length=64) + status = models.CharField( + max_length=32, + choices=DeploymentStatus, + default=DeploymentStatus.QUEUED, + ) + guest_ipv4 = models.GenericIPAddressField(protocol="IPv4", blank=True, null=True) + guest_port = models.PositiveIntegerField( + default=8000, + validators=[MinValueValidator(1024), MaxValueValidator(65535)], + ) + firecracker_vm_id = models.CharField(max_length=64, blank=True, null=True, unique=True) + last_error = models.TextField(blank=True) + started_at = models.DateTimeField(blank=True, null=True) + finished_at = models.DateTimeField(blank=True, null=True) + + objects = Manager() + + class Meta(TimestampedModel.Meta): + ordering = ("-created_at",) + indexes = [ + models.Index(fields=("hosted_site", "status"), name="deploy_site_status_idx"), + models.Index(fields=("status", "created_at"), name="deploy_status_created_idx"), + ] + + def __str__(self) -> str: + return f"{self.hosted_site} [{self.status}]" + + def build_django_launch_command(self) -> tuple[str, ...]: + """Build a uv-driven Gunicorn command for this deployment's Django app. + + Returns: + Tuple of command arguments ready for subprocess execution inside a guest VM. + """ + config = DjangoApplicationLaunchConfig( + wsgi_module=self.hosted_site.wsgi_module, + bind_host="0.0.0.0", # noqa: S104 + port=self.guest_port, + ) + return build_django_server_command(config) + + def ensure_test_runtime_services(self) -> tuple[RuntimeService, ...]: + """Create missing test runtime services for all supported service kinds. + + Returns: + Newly created runtime service records. + """ + tenant_slug = self.hosted_site.tenant.slug + site_slug = self.hosted_site.slug + deployment_suffix = self.id.hex[:12] + network_name = _build_limited_identifier( + prefix="net", + tenant_slug=tenant_slug, + site_slug=site_slug, + suffix=deployment_suffix, + max_length=128, + ) + connection_name = _build_limited_connection_name( + site_slug=site_slug, + suffix=deployment_suffix, + ) + created_services: list[RuntimeService] = [] + + with transaction.atomic(): + existing_kinds = set( + RuntimeService.objects.filter(deployment=self).values_list("kind", flat=True), + ) + for kind, seed_spec in RUNTIME_SERVICE_SEED_SPECS.items(): + if kind.value in existing_kinds: + continue + + created_services.append( + RuntimeService( + deployment=self, + kind=kind.value, + status=RuntimeServiceStatus.QUEUED.value, + container_name=_build_limited_identifier( + prefix=kind.value, + tenant_slug=tenant_slug, + site_slug=site_slug, + suffix=deployment_suffix, + max_length=128, + ), + network_name=network_name, + hostname=seed_spec.hostname, + image_reference=seed_spec.image_reference, + internal_port=seed_spec.internal_port, + connection_username=connection_name if kind == RuntimeServiceKind.POSTGRESQL else "", + connection_database=connection_name if kind == RuntimeServiceKind.POSTGRESQL else "", + connection_secret_ref=(f"secret://{kind.value}/{tenant_slug}/{site_slug}/{deployment_suffix}"), + ), + ) + + if created_services: + RuntimeService.objects.bulk_create(created_services) + + return tuple(created_services) + + +class RuntimeService(TimestampedModel): + """Track a dedicated PostgreSQL or Redis service for one deployment.""" + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + deployment = ForeignKey(Deployment, on_delete=models.CASCADE, related_name="runtime_services") + kind = models.CharField(max_length=32, choices=RuntimeServiceKind) + status = models.CharField( + max_length=32, + choices=RuntimeServiceStatus, + default=RuntimeServiceStatus.QUEUED, + ) + container_name = models.CharField(max_length=128, unique=True) + network_name = models.CharField(max_length=128) + hostname = models.CharField(max_length=128) + image_reference = models.CharField(max_length=255) + internal_port = models.PositiveIntegerField( + validators=[MinValueValidator(1), MaxValueValidator(65535)], + ) + connection_username = models.CharField(max_length=63, blank=True) + connection_database = models.CharField(max_length=63, blank=True) + connection_secret_ref = models.CharField(max_length=255) + + objects = Manager() + + class Meta(TimestampedModel.Meta): + ordering = ("deployment__created_at", "kind") + constraints = [ + models.UniqueConstraint( + fields=("deployment", "kind"), + name="runtime_service_unique_deployment_kind", + ), + ] + indexes = [ + models.Index(fields=("deployment", "kind"), name="service_deploy_kind_idx"), + models.Index(fields=("kind", "status"), name="service_kind_status_idx"), + ] + + def __str__(self) -> str: + return f"{self.deployment_id}:{self.kind}" diff --git a/control_plane/observability.py b/control_plane/observability.py new file mode 100644 index 0000000..a2f2d99 --- /dev/null +++ b/control_plane/observability.py @@ -0,0 +1,254 @@ +from __future__ import annotations + +import json +from typing import TYPE_CHECKING +from urllib.error import HTTPError +from urllib.error import URLError +from urllib.request import urlopen + +from django.utils import timezone + +from control_plane.host_commands import HostCommandError +from control_plane.host_commands import run_host_command +from control_plane.local_test_runtime import build_test_django_container_names +from control_plane.local_test_runtime import build_test_django_local_url +from control_plane.local_test_runtime import build_test_django_project_root +from control_plane.models import Deployment +from control_plane.models import DeploymentStatus + +if TYPE_CHECKING: + from collections.abc import Iterable + from pathlib import Path + + from control_plane.models import RuntimeService + + +MAX_DIAGNOSTIC_LOG_LINES = 200 +DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS = 2.0 + +type JsonPrimitive = bool | int | float | str | None +type JsonValue = JsonPrimitive | list[JsonValue] | dict[str, JsonValue] + + +def build_test_deployment_diagnostics_root(deployment: Deployment) -> Path: + """Return filesystem root for persisted deployment diagnostics.""" + return build_test_django_project_root(deployment).parent / "diagnostics" + + +def build_test_deployment_diagnostics_snapshot_path(deployment: Deployment) -> Path: + """Return JSON snapshot path for one deployment's latest diagnostics.""" + return build_test_deployment_diagnostics_root(deployment) / "snapshot.json" + + +def capture_test_deployment_diagnostics(deployment_id: str) -> None: + """Capture current pod, container, and log state for one deployment.""" + deployment = ( + Deployment.objects + .select_related("hosted_site__tenant") + .prefetch_related("runtime_services") + .get(pk=deployment_id) + ) + snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment) + snapshot_path.parent.mkdir(parents=True, exist_ok=True) + snapshot_path.write_text( + json.dumps(_build_diagnostics_snapshot(deployment), indent=2), + encoding="utf-8", + ) + + +def load_test_deployment_diagnostics(deployment: Deployment) -> dict[str, JsonValue] | None: + """Load the latest persisted diagnostics snapshot for one deployment. + + Returns: + Parsed diagnostics payload, or None when no snapshot has been captured yet. + """ + snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment) + if not snapshot_path.exists(): + return None + + try: + payload = json.loads(snapshot_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as error: + return { + "capture_error": f"Unable to parse diagnostics snapshot: {error}", + "captured_at": None, + } + + if not isinstance(payload, dict): + return { + "capture_error": "Diagnostics snapshot is not a JSON object.", + "captured_at": None, + } + + return payload + + +def probe_test_deployment_health( + deployment: Deployment, + *, + timeout_seconds: float = DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS, +) -> dict[str, JsonValue]: + """Probe the generated deployment sentinel endpoint and return structured status. + + Returns: + JSON-serializable probe state describing current sentinel reachability and payload. + """ + sentinel_url = build_test_django_local_url(deployment) + result: dict[str, JsonValue] = { + "checked_at": timezone.now().isoformat(), + "deployment_id": str(deployment.id), + "deployment_status": deployment.status, + "sentinel_url": sentinel_url, + "ok": False, + "status": "not-running", + "label": "Not Running", + "payload": None, + "error": "", + "http_status": None, + } + if deployment.status not in {DeploymentStatus.RUNNING.value, DeploymentStatus.BOOTING.value}: + return result + + try: + with urlopen(sentinel_url, timeout=timeout_seconds) as response: # noqa: S310 + payload = json.loads(response.read().decode("utf-8")) + result["http_status"] = int(getattr(response, "status", 200)) + if isinstance(payload, dict): + result["payload"] = payload + if payload.get("status") == "ok": + result["ok"] = True + result["status"] = "healthy" + result["label"] = "Healthy" + else: + result["status"] = "unexpected-payload" + result["label"] = "Unexpected" + else: + result["payload"] = {"value": str(payload)} + result["status"] = "unexpected-payload" + result["label"] = "Unexpected" + except (HTTPError, URLError, OSError, json.JSONDecodeError) as error: + result["status"] = "unreachable" + result["label"] = "Unreachable" + result["error"] = str(error) + + return result + + +def _build_diagnostics_snapshot(deployment: Deployment) -> dict[str, JsonValue]: + runtime_services = tuple(_ordered_runtime_services(deployment.runtime_services.all())) + server_container_name, _ = build_test_django_container_names(deployment) + pod_name = runtime_services[0].network_name if runtime_services else "" + + return { + "captured_at": timezone.now().isoformat(), + "deployment_id": str(deployment.id), + "deployment_status": deployment.status, + "tenant_slug": deployment.hosted_site.tenant.slug, + "site_slug": deployment.hosted_site.slug, + "guest_port": deployment.guest_port, + "sentinel_url": build_test_django_local_url(deployment), + "last_error": deployment.last_error, + "pod": _collect_pod_diagnostics(pod_name), + "django": _collect_container_diagnostics( + container_name=server_container_name, + control_plane_status=deployment.status, + label="django", + ), + "runtime_services": [ + _collect_container_diagnostics( + container_name=runtime_service.container_name, + control_plane_status=runtime_service.status, + label=runtime_service.kind, + ) + for runtime_service in runtime_services + ], + } + + +def _ordered_runtime_services(runtime_services: Iterable[RuntimeService]) -> tuple[RuntimeService, ...]: + return tuple(sorted(runtime_services, key=lambda runtime_service: runtime_service.kind)) + + +def _collect_pod_diagnostics(pod_name: str) -> dict[str, JsonValue]: + if not pod_name: + return { + "name": "", + "status": "missing", + "error": "No runtime services are linked to this deployment yet.", + } + + try: + result = run_host_command( + command=("podman", "pod", "inspect", "--format", "{{.State}}", pod_name), + timeout_seconds=20.0, + ) + except HostCommandError as error: + return { + "name": pod_name, + "status": "missing", + "error": _format_host_command_error(error), + } + + return { + "name": pod_name, + "status": result.stdout.strip() or "unknown", + "error": "", + } + + +def _collect_container_diagnostics( + *, + container_name: str, + control_plane_status: str, + label: str, +) -> dict[str, JsonValue]: + container_status, inspect_error = _inspect_container_status(container_name) + logs, log_error = _read_container_logs(container_name) + return { + "label": label, + "container_name": container_name, + "control_plane_status": control_plane_status, + "container_status": container_status, + "logs": logs, + "inspect_error": inspect_error, + "log_error": log_error, + } + + +def _inspect_container_status(container_name: str) -> tuple[str, str]: + try: + result = run_host_command( + command=( + "podman", + "inspect", + "--format", + "{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}", + container_name, + ), + timeout_seconds=20.0, + ) + except HostCommandError as error: + return "missing", _format_host_command_error(error) + + return result.stdout.strip() or "unknown", "" + + +def _read_container_logs(container_name: str) -> tuple[str, str]: + try: + result = run_host_command( + command=("podman", "logs", "--tail", str(MAX_DIAGNOSTIC_LOG_LINES), container_name), + timeout_seconds=20.0, + ) + except HostCommandError as error: + return "", _format_host_command_error(error) + + output = result.stdout.strip() or result.stderr.strip() + return output, "" + + +def _format_host_command_error(error: HostCommandError) -> str: + if error.stderr.strip(): + return error.stderr.strip() + if error.stdout.strip(): + return error.stdout.strip() + return str(error) diff --git a/control_plane/runtime_plans.py b/control_plane/runtime_plans.py new file mode 100644 index 0000000..49aaff7 --- /dev/null +++ b/control_plane/runtime_plans.py @@ -0,0 +1,366 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Sequence + from pathlib import Path + + +@dataclass(frozen=True, slots=True) +class PostgresContainerConfig: + """Input required to build a Podman command for a tenant PostgreSQL service.""" + + container_name: str + network_name: str + hostname: str + username: str + database_name: str + data_directory: Path + password_file: Path + pod_name: str | None = None + image_reference: str = "docker.io/library/postgres:17-alpine" + memory_limit_mib: int = 512 + cpu_limit: float = 1.0 + + +@dataclass(frozen=True, slots=True) +class RedisContainerConfig: + """Input required to build a Podman command for a tenant Redis service.""" + + container_name: str + network_name: str + hostname: str + data_directory: Path + password_file: Path + pod_name: str | None = None + image_reference: str = "docker.io/library/redis:7.4-alpine" + memory_limit_mib: int = 256 + cpu_limit: float = 0.5 + + +@dataclass(frozen=True, slots=True) +class DjangoApplicationLaunchConfig: + """Input required to build a uv-driven Gunicorn command for a Django app.""" + + wsgi_module: str + port: int = 8000 + bind_host: str = "127.0.0.1" + workers: int = 2 + python_executable: Path | None = None + uv_project_path: Path | None = None + + +@dataclass(frozen=True, slots=True) +class DjangoContainerImageBuildConfig: + """Input required to build the reusable local Django test image.""" + + image_reference: str + containerfile_path: Path + context_directory: Path + + +@dataclass(frozen=True, slots=True) +class DjangoContainerRuntimeConfig: + """Input required to run a local Django test container with Podman.""" + + container_name: str + network_name: str + hostname: str + image_reference: str + application_directory: Path + pod_name: str | None = None + host_port: int | None = None + guest_port: int = 8000 + working_directory: str = "/srv/test-app" + environment: tuple[tuple[str, str], ...] = () + secret_mounts: tuple[tuple[Path, str], ...] = () + labels: tuple[tuple[str, str], ...] = () + memory_limit_mib: int = 256 + cpu_limit: float = 1.0 + + +def build_postgres_container_command( + config: PostgresContainerConfig, +) -> tuple[str, ...]: + """Build a hardened Podman command for a deployment-scoped PostgreSQL service. + + Returns: + Tuple of Podman arguments ready for subprocess execution. + """ + command = [ + "podman", + "run", + "--detach", + "--replace", + "--name", + config.container_name, + ] + if config.pod_name is None: + command.extend(("--network", config.network_name, "--hostname", config.hostname)) + else: + command.extend(("--pod", config.pod_name)) + + command.extend( + [ + "--cap-drop=all", + "--cap-add=CHOWN", + "--cap-add=FOWNER", + "--cap-add=SETUID", + "--cap-add=SETGID", + "--cap-add=DAC_OVERRIDE", + "--security-opt=no-new-privileges", + "--pids-limit=256", + "--memory", + f"{config.memory_limit_mib}m", + "--cpus", + str(config.cpu_limit), + "--read-only", + "--tmpfs", + "/tmp:rw,nosuid,nodev,noexec,size=64m", # noqa: S108 + "--tmpfs", + "/var/run/postgresql:rw,nosuid,nodev,noexec,size=16m", + "--volume", + f"{config.data_directory}:/var/lib/postgresql/data:Z,rw", + "--volume", + f"{config.password_file}:/run/secrets/postgres-password:Z,ro", + "--env", + f"POSTGRES_USER={config.username}", + "--env", + f"POSTGRES_DB={config.database_name}", + "--env", + "POSTGRES_PASSWORD_FILE=/run/secrets/postgres-password", + "--health-cmd", + f"pg_isready -U {config.username} -d {config.database_name}", + "--health-interval", + "10s", + "--health-retries", + "5", + config.image_reference, + "postgres", + "-c", + "listen_addresses=*", + "-c", + "password_encryption=scram-sha-256", + ], + ) + return tuple(command) + + +def build_redis_container_command(config: RedisContainerConfig) -> tuple[str, ...]: + """Build a hardened Podman command for a deployment-scoped Redis service. + + Returns: + Tuple of Podman arguments ready for subprocess execution. + """ + command = [ + "podman", + "run", + "--detach", + "--replace", + "--name", + config.container_name, + ] + if config.pod_name is None: + command.extend(("--network", config.network_name, "--hostname", config.hostname)) + else: + command.extend(("--pod", config.pod_name)) + + command.extend( + [ + "--cap-drop=all", + "--security-opt=no-new-privileges", + "--pids-limit=128", + "--memory", + f"{config.memory_limit_mib}m", + "--cpus", + str(config.cpu_limit), + "--read-only", + "--tmpfs", + "/tmp:rw,nosuid,nodev,noexec,size=32m", # noqa: S108 + "--volume", + f"{config.data_directory}:/data:Z,rw", + "--volume", + f"{config.password_file}:/run/secrets/redis-password:Z,ro", + "--health-cmd", + "sh -eu -c 'redis-cli --no-auth-warning -a \"$(cat /run/secrets/redis-password)\" ping'", + "--health-interval", + "10s", + "--health-retries", + "5", + config.image_reference, + "sh", + "-eu", + "-c", + 'redis_password=$(cat /run/secrets/redis-password) && exec redis-server --appendonly yes --protected-mode yes --requirepass "${redis_password}"', + ], + ) + return tuple(command) + + +def build_django_server_command( + config: DjangoApplicationLaunchConfig, +) -> tuple[str, ...]: + """Build a uv-driven Gunicorn command for a hosted Django deployment. + + Returns: + Tuple of command arguments ready for subprocess execution. + + Raises: + ValueError: If both direct-python and uv-project execution modes are requested. + """ + if config.python_executable is not None and config.uv_project_path is not None: + msg = "python_executable and uv_project_path are mutually exclusive" + raise ValueError(msg) + + if config.python_executable is not None: + command = [str(config.python_executable), "-m", "gunicorn"] + else: + command = ["uv", "run"] + if config.uv_project_path is not None: + command.extend(["--project", str(config.uv_project_path)]) + + command.append("gunicorn") + + command.extend( + [ + "--bind", + f"{config.bind_host}:{config.port}", + "--workers", + str(config.workers), + "--access-logfile", + "-", + "--error-logfile", + "-", + "--capture-output", + "--graceful-timeout", + "30", + "--timeout", + "60", + config.wsgi_module, + ], + ) + return tuple(command) + + +def build_django_container_image_command( + config: DjangoContainerImageBuildConfig, +) -> tuple[str, ...]: + """Build a Podman image command for the reusable Django test runtime. + + Returns: + Tuple of Podman arguments ready for subprocess execution. + """ + return ( + "podman", + "build", + "--pull=missing", + "--tag", + config.image_reference, + "--file", + str(config.containerfile_path), + str(config.context_directory), + ) + + +def build_django_container_run_command( + config: DjangoContainerRuntimeConfig, + *, + command: Sequence[str], + detach: bool, + remove: bool = False, +) -> tuple[str, ...]: + """Build a hardened Podman command for a local Django test container. + + Returns: + Tuple of Podman arguments ready for subprocess execution. + + Raises: + ValueError: If the command sequence is empty. + """ + if not command: + msg = "command must not be empty" + raise ValueError(msg) + + podman_command = ["podman", "run"] + if detach: + podman_command.extend(("--detach", "--replace")) + if remove: + podman_command.append("--rm") + + podman_command.extend( + [ + "--name", + config.container_name, + ], + ) + if config.pod_name is None: + podman_command.extend(("--network", config.network_name, "--hostname", config.hostname)) + else: + podman_command.extend(("--pod", config.pod_name)) + + podman_command.extend( + [ + "--workdir", + config.working_directory, + "--cap-drop=all", + "--security-opt=no-new-privileges", + "--pids-limit=256", + "--memory", + f"{config.memory_limit_mib}m", + "--cpus", + str(config.cpu_limit), + "--read-only", + "--tmpfs", + "/tmp:rw,nosuid,nodev,noexec,size=64m", # noqa: S108 + "--tmpfs", + "/run:rw,nosuid,nodev,noexec,size=16m", + "--volume", + f"{config.application_directory}:{config.working_directory}:Z,ro", + ], + ) + + if config.host_port is not None and config.pod_name is None: + podman_command.extend(("--publish", f"127.0.0.1:{config.host_port}:{config.guest_port}")) + + for mount_source, mount_target in config.secret_mounts: + podman_command.extend(("--volume", f"{mount_source}:{mount_target}:Z,ro")) + + for key, value in config.environment: + podman_command.extend(("--env", f"{key}={value}")) + + for key, value in config.labels: + podman_command.extend(("--label", f"{key}={value}")) + + podman_command.append(config.image_reference) + podman_command.extend(command) + return tuple(podman_command) + + +def build_django_migrate_command( + uv_project_path: Path | None = None, + *, + python_executable: Path | None = None, +) -> tuple[str, ...]: + """Build a uv-driven migration command for a hosted Django deployment. + + Returns: + Tuple of command arguments ready for subprocess execution. + + Raises: + ValueError: If direct-python and uv-project execution modes are mixed. + """ + if python_executable is not None and uv_project_path is not None: + msg = "python_executable and uv_project_path are mutually exclusive" + raise ValueError(msg) + + if python_executable is not None: + return (str(python_executable), "manage.py", "migrate", "--noinput") + + command = ["uv", "run"] + if uv_project_path is not None: + command.extend(["--project", str(uv_project_path)]) + + command.extend(["python", "manage.py", "migrate", "--noinput"]) + return tuple(command) diff --git a/control_plane/tasks.py b/control_plane/tasks.py new file mode 100644 index 0000000..372fb72 --- /dev/null +++ b/control_plane/tasks.py @@ -0,0 +1,656 @@ +from __future__ import annotations + +import json +import logging +import secrets +import time +from pathlib import Path +from typing import TYPE_CHECKING +from typing import NoReturn +from urllib.request import urlopen + +from celery import shared_task +from django.conf import settings +from django.db import transaction +from django.utils import timezone + +from control_plane.host_commands import HostCommandError +from control_plane.host_commands import run_host_command +from control_plane.local_test_runtime import TEST_DJANGO_CONTAINER_PORT +from control_plane.local_test_runtime import TEST_DJANGO_WORKDIR +from control_plane.local_test_runtime import build_test_django_container_context_path +from control_plane.local_test_runtime import build_test_django_container_labels +from control_plane.local_test_runtime import build_test_django_container_names +from control_plane.local_test_runtime import build_test_django_containerfile_path +from control_plane.local_test_runtime import build_test_django_environment +from control_plane.local_test_runtime import build_test_django_image_reference +from control_plane.local_test_runtime import build_test_django_local_url +from control_plane.local_test_runtime import build_test_django_secret_mounts +from control_plane.local_test_runtime import write_test_django_project +from control_plane.models import Deployment +from control_plane.models import DeploymentStatus +from control_plane.models import RuntimeService +from control_plane.models import RuntimeServiceKind +from control_plane.models import RuntimeServiceStatus +from control_plane.observability import capture_test_deployment_diagnostics +from control_plane.runtime_plans import DjangoApplicationLaunchConfig +from control_plane.runtime_plans import DjangoContainerImageBuildConfig +from control_plane.runtime_plans import DjangoContainerRuntimeConfig +from control_plane.runtime_plans import PostgresContainerConfig +from control_plane.runtime_plans import RedisContainerConfig +from control_plane.runtime_plans import build_django_container_image_command +from control_plane.runtime_plans import build_django_container_run_command +from control_plane.runtime_plans import build_django_migrate_command +from control_plane.runtime_plans import build_django_server_command +from control_plane.runtime_plans import build_postgres_container_command +from control_plane.runtime_plans import build_redis_container_command + +if TYPE_CHECKING: + from celery.app.task import Task + + type BoundControlPlaneTask = Task[..., str] + + +logger = logging.getLogger("tussilago.control_plane.tasks") + +DEFAULT_HTTP_READY_TIMEOUT_SECONDS = 45.0 +DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS = 45.0 + + +TERMINAL_DEPLOYMENT_STATES: frozenset[str] = frozenset( + { + DeploymentStatus.DESTROYED.value, + DeploymentStatus.FAILED.value, + }, +) + +TERMINAL_RUNTIME_SERVICE_STATES: frozenset[str] = frozenset( + { + RuntimeServiceStatus.DESTROYING.value, + RuntimeServiceStatus.DESTROYED.value, + }, +) + + +def _runtime_service_root(runtime_service: RuntimeService) -> Path: + """Return filesystem root for one runtime service's local test artifacts.""" + return Path(settings.DATA_DIR) / "runtime-services" / str(runtime_service.deployment_id) / runtime_service.kind + + +def _mark_deployment_failed(*, deployment_id: str, message: str) -> None: + """Persist failed deployment state with the latest error details.""" + with transaction.atomic(): + deployment = Deployment.objects.select_for_update().get(pk=deployment_id) + deployment.status = DeploymentStatus.FAILED.value + deployment.last_error = message + deployment.finished_at = timezone.now() + deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"]) + + +def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None: + """Persist best-effort diagnostics without breaking deployment flow.""" + try: + capture_test_deployment_diagnostics(deployment_id) + except OSError: + logger.exception("Failed to write diagnostics snapshot deployment_id=%s", deployment_id) + except ValueError: + logger.exception("Invalid diagnostics snapshot state deployment_id=%s", deployment_id) + except Deployment.DoesNotExist: + logger.exception("Diagnostics snapshot skipped for missing deployment_id=%s", deployment_id) + + +def _ensure_test_django_image_exists(image_reference: str) -> None: + """Build the reusable Django test image if it is missing locally. + + Raises: + HostCommandError: If Podman image inspection or build fails. + """ + try: + run_host_command(command=("podman", "image", "exists", image_reference)) + except HostCommandError as error: + if error.returncode != 1: + raise + + run_host_command( + command=build_django_container_image_command( + DjangoContainerImageBuildConfig( + image_reference=image_reference, + containerfile_path=build_test_django_containerfile_path(), + context_directory=build_test_django_container_context_path(), + ), + ), + timeout_seconds=300.0, + ) + + +def _read_container_logs(container_name: str) -> str: + """Return captured container logs for failure reporting when available.""" + try: + result = run_host_command(command=("podman", "logs", container_name)) + except HostCommandError: + return "" + + return result.stdout.strip() or result.stderr.strip() + + +def _read_container_status(container_name: str) -> str: + """Return current Podman health status for one container when available.""" + result = run_host_command( + command=( + "podman", + "inspect", + "--format", + "{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}", + container_name, + ), + ) + return result.stdout.strip() + + +def _wait_for_container_ready( + runtime_service: RuntimeService, + *, + timeout_seconds: float = DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS, +) -> None: + """Poll Podman health state until one runtime service is ready. + + Raises: + RuntimeError: If the runtime service exits or becomes unhealthy before it is ready. + TimeoutError: If the runtime service does not become ready before timeout. + """ + deadline = time.monotonic() + timeout_seconds + while time.monotonic() < deadline: + status = _read_container_status(runtime_service.container_name) + if status == "healthy": + return + if status in {"exited", "dead", "stopped", "unhealthy"}: + logs = _read_container_logs(runtime_service.container_name) + message = f"Runtime service {runtime_service.kind} failed to become ready: {status}." + if logs: + message = f"{message}\n{logs}" + raise RuntimeError(message) + + time.sleep(1.0) + + msg = f"Timed out waiting for runtime service {runtime_service.kind} to become healthy." + raise TimeoutError(msg) + + +def _wait_for_http_ready( + url: str, + *, + timeout_seconds: float = DEFAULT_HTTP_READY_TIMEOUT_SECONDS, +) -> dict[str, str | int]: + """Poll a sentinel endpoint until it confirms PostgreSQL and Redis connectivity. + + Returns: + Parsed JSON response from the sentinel endpoint. + + Raises: + TimeoutError: If the endpoint does not become healthy before timeout. + """ + deadline = time.monotonic() + timeout_seconds + last_error: Exception | None = None + while time.monotonic() < deadline: + try: + with urlopen(url, timeout=2) as response: # noqa: S310 + payload = json.loads(response.read().decode("utf-8")) + if payload.get("status") == "ok": + return payload + except (OSError, json.JSONDecodeError) as error: + last_error = error + + time.sleep(1.0) + + msg = f"Timed out waiting for healthy Django sentinel endpoint at {url}" + raise TimeoutError(msg) from last_error + + +def _build_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]: + return tuple( + RuntimeService.objects + .select_related("deployment__hosted_site__tenant") + .filter(deployment=deployment) + .order_by("kind"), + ) + + +def _get_ready_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]: + """Return ready runtime services required by the generated Django test app. + + Raises: + ValueError: If PostgreSQL or Redis containers are not ready. + """ + runtime_services = _build_django_runtime_services(deployment) + if not runtime_services or any( + runtime_service.status != RuntimeServiceStatus.READY.value for runtime_service in runtime_services + ): + msg = "All runtime services must be ready before provisioning the Django test runtime." + raise ValueError(msg) + + return runtime_services + + +def _build_django_runtime_configs( + deployment: Deployment, + runtime_services: tuple[RuntimeService, ...], + *, + project_root: Path, +) -> tuple[str, DjangoContainerRuntimeConfig, DjangoContainerRuntimeConfig]: + """Build image reference plus migrate and server configs for one deployment. + + Returns: + Image reference plus migrate and server Podman runtime configs. + """ + image_reference = build_test_django_image_reference() + environment = build_test_django_environment(deployment, runtime_services) + secret_mounts = build_test_django_secret_mounts(runtime_services) + labels = build_test_django_container_labels(deployment) + server_container_name, migrate_container_name = build_test_django_container_names(deployment) + network_name = runtime_services[0].network_name + migrate_config = DjangoContainerRuntimeConfig( + container_name=migrate_container_name, + network_name=network_name, + hostname="django-migrate.internal", + image_reference=image_reference, + application_directory=project_root, + pod_name=network_name, + working_directory=TEST_DJANGO_WORKDIR, + environment=environment, + secret_mounts=secret_mounts, + labels=labels, + ) + server_config = DjangoContainerRuntimeConfig( + container_name=server_container_name, + network_name=network_name, + hostname="django.internal", + image_reference=image_reference, + application_directory=project_root, + pod_name=network_name, + host_port=deployment.guest_port, + guest_port=TEST_DJANGO_CONTAINER_PORT, + working_directory=TEST_DJANGO_WORKDIR, + environment=environment, + secret_mounts=secret_mounts, + labels=labels, + ) + return image_reference, migrate_config, server_config + + +def _launch_django_runtime( + deployment: Deployment, + *, + image_reference: str, + migrate_config: DjangoContainerRuntimeConfig, + server_config: DjangoContainerRuntimeConfig, +) -> dict[str, str | int]: + """Build image, run migrations, launch the Django container, and wait for readiness. + + Returns: + Parsed JSON sentinel payload from the running Django test app. + """ + _ensure_test_django_image_exists(image_reference) + + migrate_command = build_django_migrate_command(python_executable=Path("/usr/local/bin/python")) + run_host_command( + command=build_django_container_run_command( + migrate_config, + command=migrate_command, + detach=False, + remove=True, + ), + timeout_seconds=120.0, + ) + + server_command = build_django_server_command( + DjangoApplicationLaunchConfig( + wsgi_module=deployment.hosted_site.wsgi_module, + bind_host="0.0.0.0", # noqa: S104 + port=TEST_DJANGO_CONTAINER_PORT, + workers=1, + python_executable=Path("/usr/local/bin/python"), + ), + ) + run_host_command( + command=build_django_container_run_command( + server_config, + command=server_command, + detach=True, + ), + timeout_seconds=120.0, + ) + return _wait_for_http_ready(build_test_django_local_url(deployment)) + + +def _retry_or_fail_django_runtime( + self: BoundControlPlaneTask, + *, + deployment: Deployment, + error: HostCommandError | TimeoutError, +) -> NoReturn: + """Retry transient Django runtime failures, or mark deployment failed when retries are exhausted.""" + retries = getattr(self.request, "retries", 0) + logger.warning( + "Django runtime provisioning retry deployment_id=%s retries=%s error=%s", + deployment.id, + retries, + error, + ) + if retries >= self.max_retries: + server_container_name, _ = build_test_django_container_names(deployment) + logs = _read_container_logs(server_container_name) + failure_message = str(error) + if logs: + failure_message = f"{failure_message}\n{logs}" + _mark_deployment_failed(deployment_id=str(deployment.id), message=failure_message) + _capture_test_deployment_diagnostics_snapshot(str(deployment.id)) + logger.error("Django runtime provisioning failed deployment_id=%s", deployment.id) + raise error + + countdown = min(300, 2 ** (retries + 1)) + raise self.retry(exc=error, countdown=countdown) from error + + +def run_test_django_runtime_provisioning(deployment_id: str) -> str: + """Run generated Django runtime provisioning inline for one deployment. + + Returns: + Final deployment status for the processed deployment. + """ + deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id) + if deployment.status in TERMINAL_DEPLOYMENT_STATES or deployment.status == DeploymentStatus.RUNNING.value: + return deployment.status + + runtime_services = _get_ready_django_runtime_services(deployment) + project_root = write_test_django_project(deployment, runtime_services) + image_reference, migrate_config, server_config = _build_django_runtime_configs( + deployment, + runtime_services, + project_root=project_root, + ) + sentinel_payload = _launch_django_runtime( + deployment, + image_reference=image_reference, + migrate_config=migrate_config, + server_config=server_config, + ) + + with transaction.atomic(): + deployment = Deployment.objects.select_for_update().get(pk=deployment_id) + if deployment.status in TERMINAL_DEPLOYMENT_STATES: + return deployment.status + + deployment.status = DeploymentStatus.RUNNING.value + deployment.last_error = "" + deployment.started_at = timezone.now() + deployment.finished_at = None + deployment.save(update_fields=["status", "last_error", "started_at", "finished_at", "updated_at"]) + + _capture_test_deployment_diagnostics_snapshot(deployment_id) + logger.info( + "Django runtime ready deployment_id=%s tenant_slug=%s site_slug=%s postgres=%s redis=%s", + deployment_id, + deployment.hosted_site.tenant.slug, + deployment.hosted_site.slug, + sentinel_payload.get("postgres"), + sentinel_payload.get("redis"), + ) + return DeploymentStatus.RUNNING.value + + +def _ensure_secret_file(password_file: Path) -> None: + """Write a reusable password file for a test container if one does not already exist.""" + password_file.parent.mkdir(parents=True, exist_ok=True) + if password_file.exists(): + return + + password_file.write_text(f"{secrets.token_urlsafe(24)}\n", encoding="utf-8") + password_file.chmod(0o600) + + +def _ensure_podman_pod(*, pod_name: str, host_port: int) -> None: + """Create a Podman pod if it is missing. + + Raises: + HostCommandError: If Podman pod inspection or creation fails. + """ + try: + run_host_command(command=("podman", "pod", "exists", pod_name)) + except HostCommandError as error: + if error.returncode != 1: + raise + + run_host_command( + command=( + "podman", + "pod", + "create", + "--replace", + "--name", + pod_name, + "--publish", + f"127.0.0.1:{host_port}:{TEST_DJANGO_CONTAINER_PORT}", + ), + ) + + +def _build_runtime_service_command( + runtime_service: RuntimeService, + *, + data_directory: Path, + password_file: Path, +) -> tuple[str, ...]: + """Build a Podman command for one runtime service kind. + + Returns: + Podman command arguments for the runtime service. + + Raises: + ValueError: If the runtime service kind or configuration is unsupported. + """ + if runtime_service.kind == RuntimeServiceKind.POSTGRESQL.value: + if not runtime_service.connection_username or not runtime_service.connection_database: + msg = "PostgreSQL runtime service requires connection credentials." + raise ValueError(msg) + + return build_postgres_container_command( + PostgresContainerConfig( + container_name=runtime_service.container_name, + network_name=runtime_service.network_name, + hostname=runtime_service.hostname, + username=runtime_service.connection_username, + database_name=runtime_service.connection_database, + data_directory=data_directory, + password_file=password_file, + pod_name=runtime_service.network_name, + image_reference=runtime_service.image_reference, + ), + ) + + if runtime_service.kind == RuntimeServiceKind.REDIS.value: + return build_redis_container_command( + RedisContainerConfig( + container_name=runtime_service.container_name, + network_name=runtime_service.network_name, + hostname=runtime_service.hostname, + data_directory=data_directory, + password_file=password_file, + pod_name=runtime_service.network_name, + image_reference=runtime_service.image_reference, + ), + ) + + msg = f"Unsupported runtime service kind: {runtime_service.kind}" + raise ValueError(msg) + + +def _provision_runtime_service_container(runtime_service: RuntimeService) -> None: + """Create or replace a local test container for one runtime service.""" + service_root = _runtime_service_root(runtime_service) + data_directory = service_root / "data" + password_file = service_root / "secrets" / "password" + + data_directory.mkdir(parents=True, exist_ok=True) + _ensure_secret_file(password_file) + _ensure_podman_pod( + pod_name=runtime_service.network_name, + host_port=runtime_service.deployment.guest_port, + ) + + command = _build_runtime_service_command( + runtime_service, + data_directory=data_directory, + password_file=password_file, + ) + run_host_command(command=command) + _wait_for_container_ready(runtime_service) + + +@shared_task( + bind=True, + autoretry_for=(HostCommandError, TimeoutError), + retry_backoff=True, + retry_backoff_max=300, + retry_jitter=True, + max_retries=5, +) +def provision_test_runtime_services(self: BoundControlPlaneTask, deployment_id: str) -> str: + """Seed and provision runtime service test containers for one deployment. + + Returns: + Final runtime service status for the processed deployment. + + Raises: + HostCommandError: If Podman commands fail while provisioning backing services. + RuntimeError: If a backing container exits or becomes unhealthy during startup. + TimeoutError: If a backing container never becomes healthy. + ValueError: If runtime service configuration is invalid. + """ + del self + deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id) + if deployment.status in TERMINAL_DEPLOYMENT_STATES: + return deployment.status + + deployment.ensure_test_runtime_services() + runtime_services = tuple( + RuntimeService.objects + .select_related("deployment__hosted_site__tenant") + .filter(deployment=deployment) + .order_by("kind"), + ) + pending_runtime_services = tuple( + runtime_service + for runtime_service in runtime_services + if runtime_service.status not in TERMINAL_RUNTIME_SERVICE_STATES + and runtime_service.status != RuntimeServiceStatus.READY.value + ) + if not pending_runtime_services: + return RuntimeServiceStatus.READY.value + + for runtime_service in pending_runtime_services: + runtime_service.status = RuntimeServiceStatus.PROVISIONING.value + runtime_service.save(update_fields=["status", "updated_at"]) + + try: + _provision_runtime_service_container(runtime_service) + except HostCommandError, RuntimeError, TimeoutError: + runtime_service.status = RuntimeServiceStatus.FAILED.value + runtime_service.save(update_fields=["status", "updated_at"]) + _capture_test_deployment_diagnostics_snapshot(deployment_id) + logger.exception( + "Runtime service provisioning failed deployment_id=%s runtime_service_id=%s kind=%s", + deployment_id, + runtime_service.id, + runtime_service.kind, + ) + raise + except ValueError: + runtime_service.status = RuntimeServiceStatus.FAILED.value + runtime_service.save(update_fields=["status", "updated_at"]) + logger.exception( + "Runtime service configuration invalid deployment_id=%s runtime_service_id=%s kind=%s", + deployment_id, + runtime_service.id, + runtime_service.kind, + ) + raise + + runtime_service.status = RuntimeServiceStatus.READY.value + runtime_service.save(update_fields=["status", "updated_at"]) + + _capture_test_deployment_diagnostics_snapshot(deployment_id) + return RuntimeServiceStatus.READY.value + + +@shared_task( + bind=True, + retry_backoff=True, + retry_backoff_max=300, + retry_jitter=True, + max_retries=5, +) +def mark_deployment_provisioning(self: BoundControlPlaneTask, deployment_id: str) -> str: + """Move a deployment into provisioning state in an idempotent way. + + Returns: + The deployment status after the transition attempt. + """ + del self + with transaction.atomic(): + deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id) + if deployment.status in TERMINAL_DEPLOYMENT_STATES: + return deployment.status + if deployment.status == DeploymentStatus.PROVISIONING.value: + return deployment.status + + deployment.status = DeploymentStatus.PROVISIONING.value + deployment.last_error = "" + deployment.save(update_fields=["status", "last_error", "updated_at"]) + return deployment.status + + +@shared_task( + bind=True, + retry_backoff=True, + retry_backoff_max=300, + retry_jitter=True, + max_retries=5, +) +def mark_deployment_booting(self: BoundControlPlaneTask, deployment_id: str) -> str: + """Move a deployment into booting state in an idempotent way. + + Returns: + The deployment status after the transition attempt. + """ + del self + with transaction.atomic(): + deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id) + if deployment.status in TERMINAL_DEPLOYMENT_STATES: + return deployment.status + if deployment.status == DeploymentStatus.BOOTING.value: + return deployment.status + + deployment.status = DeploymentStatus.BOOTING.value + deployment.save(update_fields=["status", "updated_at"]) + return deployment.status + + +@shared_task(bind=True, max_retries=5) +def provision_test_django_runtime(self: BoundControlPlaneTask, deployment_id: str) -> str: + """Build and run a generated Django test app against ready PostgreSQL and Redis containers. + + Returns: + Final deployment status for the processed deployment. + + Raises: + ValueError: If required backing services are not ready. + """ + try: + return run_test_django_runtime_provisioning(deployment_id) + except ValueError as error: + _mark_deployment_failed(deployment_id=deployment_id, message=str(error)) + logger.exception("Django runtime configuration invalid deployment_id=%s", deployment_id) + raise + except (HostCommandError, TimeoutError) as error: + deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id) + _retry_or_fail_django_runtime(self, deployment=deployment, error=error) diff --git a/control_plane/templates/control_plane/base.html b/control_plane/templates/control_plane/base.html new file mode 100644 index 0000000..e776e9e --- /dev/null +++ b/control_plane/templates/control_plane/base.html @@ -0,0 +1,35 @@ +{% load static %} + + +
+ + + + +Tussilago Local Runtime
+Control Plane Overview
++ Recent deployments, backing-service states, direct sentinel links, and fast paths into detailed logs. +
+{{ card.deployment.hosted_site.tenant.slug }}
+{{ card.deployment.last_error }}
+
+ Run uv run python manage.py create_test_deployment to populate this dashboard.
+
Deployment Detail
+Deployment {{ deployment.id }} on localhost port {{ deployment.guest_port }}.
+Live Health
++ {% if health_probe.error %} + {{ health_probe.error }} + {% elif health_probe.ok %} + Sentinel responded with healthy payload. + {% else %} + Waiting for a healthy sentinel response. + {% endif %} +
+{% if health_probe.payload %}{{ health_probe.payload }}{% elif health_probe.error %}{{ health_probe.error }}{% else %}No payload yet.{% endif %}
+ Facts
+{{ deployment.last_error }}
+ Runtime Services
+{{ runtime_service.hostname }}:{{ runtime_service.internal_port }}
+Control plane status: {{ runtime_service.status }}
+No runtime services recorded yet.
+ {% endfor %} +Diagnostics Snapshot
+{{ diagnostics.pod.error }}
{% endif %} + {% else %} +No diagnostics snapshot has been captured yet.
+ {% endif %} +Django Container
+{{ diagnostics.django.inspect_error }}
{% endif %} +{{ diagnostics.django.logs|default:'No Django logs captured yet.' }}
+ {{ runtime_service.label }}
+{{ runtime_service.inspect_error }}
{% endif %} + {% if runtime_service.log_error %}{{ runtime_service.log_error }}
{% endif %} +{{ runtime_service.logs|default:'No logs captured yet.' }}
+