from __future__ import annotations import json from typing import TYPE_CHECKING from urllib.error import HTTPError from urllib.error import URLError from urllib.request import urlopen from django.utils import timezone from control_plane.host_commands import HostCommandError from control_plane.host_commands import run_host_command from control_plane.local_test_runtime import build_test_django_container_names from control_plane.local_test_runtime import build_test_django_local_url from control_plane.local_test_runtime import build_test_django_project_root from control_plane.models import Deployment from control_plane.models import DeploymentStatus if TYPE_CHECKING: from collections.abc import Iterable from pathlib import Path from control_plane.models import RuntimeService MAX_DIAGNOSTIC_LOG_LINES = 200 DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS = 2.0 type JsonPrimitive = bool | int | float | str | None type JsonValue = JsonPrimitive | list[JsonValue] | dict[str, JsonValue] def build_test_deployment_diagnostics_root(deployment: Deployment) -> Path: """Return filesystem root for persisted deployment diagnostics.""" return build_test_django_project_root(deployment).parent / "diagnostics" def build_test_deployment_diagnostics_snapshot_path(deployment: Deployment) -> Path: """Return JSON snapshot path for one deployment's latest diagnostics.""" return build_test_deployment_diagnostics_root(deployment) / "snapshot.json" def capture_test_deployment_diagnostics(deployment_id: str) -> None: """Capture current pod, container, and log state for one deployment.""" deployment = ( Deployment.objects .select_related("hosted_site__tenant") .prefetch_related("runtime_services") .get(pk=deployment_id) ) snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment) snapshot_path.parent.mkdir(parents=True, exist_ok=True) snapshot_path.write_text( json.dumps(_build_diagnostics_snapshot(deployment), indent=2), encoding="utf-8", ) def load_test_deployment_diagnostics(deployment: Deployment) -> dict[str, JsonValue] | None: """Load the latest persisted diagnostics snapshot for one deployment. Returns: Parsed diagnostics payload, or None when no snapshot has been captured yet. """ snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment) if not snapshot_path.exists(): return None try: payload = json.loads(snapshot_path.read_text(encoding="utf-8")) except json.JSONDecodeError as error: return { "capture_error": f"Unable to parse diagnostics snapshot: {error}", "captured_at": None, } if not isinstance(payload, dict): return { "capture_error": "Diagnostics snapshot is not a JSON object.", "captured_at": None, } return payload def probe_test_deployment_health( deployment: Deployment, *, timeout_seconds: float = DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS, ) -> dict[str, JsonValue]: """Probe the generated deployment sentinel endpoint and return structured status. Returns: JSON-serializable probe state describing current sentinel reachability and payload. """ sentinel_url = build_test_django_local_url(deployment) result: dict[str, JsonValue] = { "checked_at": timezone.now().isoformat(), "deployment_id": str(deployment.id), "deployment_status": deployment.status, "sentinel_url": sentinel_url, "ok": False, "status": "not-running", "label": "Not Running", "payload": None, "error": "", "http_status": None, } if deployment.status not in {DeploymentStatus.RUNNING.value, DeploymentStatus.BOOTING.value}: return result try: with urlopen(sentinel_url, timeout=timeout_seconds) as response: # noqa: S310 payload = json.loads(response.read().decode("utf-8")) result["http_status"] = int(getattr(response, "status", 200)) if isinstance(payload, dict): result["payload"] = payload if payload.get("status") == "ok": result["ok"] = True result["status"] = "healthy" result["label"] = "Healthy" else: result["status"] = "unexpected-payload" result["label"] = "Unexpected" else: result["payload"] = {"value": str(payload)} result["status"] = "unexpected-payload" result["label"] = "Unexpected" except (HTTPError, URLError, OSError, json.JSONDecodeError) as error: result["status"] = "unreachable" result["label"] = "Unreachable" result["error"] = str(error) return result def _build_diagnostics_snapshot(deployment: Deployment) -> dict[str, JsonValue]: runtime_services = tuple(_ordered_runtime_services(deployment.runtime_services.all())) server_container_name, _ = build_test_django_container_names(deployment) pod_name = runtime_services[0].network_name if runtime_services else "" return { "captured_at": timezone.now().isoformat(), "deployment_id": str(deployment.id), "deployment_status": deployment.status, "tenant_slug": deployment.hosted_site.tenant.slug, "site_slug": deployment.hosted_site.slug, "guest_port": deployment.guest_port, "sentinel_url": build_test_django_local_url(deployment), "last_error": deployment.last_error, "pod": _collect_pod_diagnostics(pod_name), "django": _collect_container_diagnostics( container_name=server_container_name, control_plane_status=deployment.status, label="django", ), "runtime_services": [ _collect_container_diagnostics( container_name=runtime_service.container_name, control_plane_status=runtime_service.status, label=runtime_service.kind, ) for runtime_service in runtime_services ], } def _ordered_runtime_services(runtime_services: Iterable[RuntimeService]) -> tuple[RuntimeService, ...]: return tuple(sorted(runtime_services, key=lambda runtime_service: runtime_service.kind)) def _collect_pod_diagnostics(pod_name: str) -> dict[str, JsonValue]: if not pod_name: return { "name": "", "status": "missing", "error": "No runtime services are linked to this deployment yet.", } try: result = run_host_command( command=("podman", "pod", "inspect", "--format", "{{.State}}", pod_name), timeout_seconds=20.0, ) except HostCommandError as error: return { "name": pod_name, "status": "missing", "error": _format_host_command_error(error), } return { "name": pod_name, "status": result.stdout.strip() or "unknown", "error": "", } def _collect_container_diagnostics( *, container_name: str, control_plane_status: str, label: str, ) -> dict[str, JsonValue]: container_status, inspect_error = _inspect_container_status(container_name) logs, log_error = _read_container_logs(container_name) return { "label": label, "container_name": container_name, "control_plane_status": control_plane_status, "container_status": container_status, "logs": logs, "inspect_error": inspect_error, "log_error": log_error, } def _inspect_container_status(container_name: str) -> tuple[str, str]: try: result = run_host_command( command=( "podman", "inspect", "--format", "{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}", container_name, ), timeout_seconds=20.0, ) except HostCommandError as error: return "missing", _format_host_command_error(error) return result.stdout.strip() or "unknown", "" def _read_container_logs(container_name: str) -> tuple[str, str]: try: result = run_host_command( command=("podman", "logs", "--tail", str(MAX_DIAGNOSTIC_LOG_LINES), container_name), timeout_seconds=20.0, ) except HostCommandError as error: return "", _format_host_command_error(error) output = result.stdout.strip() or result.stderr.strip() return output, "" def _format_host_command_error(error: HostCommandError) -> str: if error.stderr.strip(): return error.stderr.strip() if error.stdout.strip(): return error.stdout.strip() return str(error)