WIP
This commit is contained in:
parent
e70a0584c9
commit
a7a5b5c8ea
43 changed files with 5531 additions and 9 deletions
656
control_plane/tasks.py
Normal file
656
control_plane/tasks.py
Normal file
|
|
@ -0,0 +1,656 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import secrets
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import NoReturn
|
||||
from urllib.request import urlopen
|
||||
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
from control_plane.host_commands import HostCommandError
|
||||
from control_plane.host_commands import run_host_command
|
||||
from control_plane.local_test_runtime import TEST_DJANGO_CONTAINER_PORT
|
||||
from control_plane.local_test_runtime import TEST_DJANGO_WORKDIR
|
||||
from control_plane.local_test_runtime import build_test_django_container_context_path
|
||||
from control_plane.local_test_runtime import build_test_django_container_labels
|
||||
from control_plane.local_test_runtime import build_test_django_container_names
|
||||
from control_plane.local_test_runtime import build_test_django_containerfile_path
|
||||
from control_plane.local_test_runtime import build_test_django_environment
|
||||
from control_plane.local_test_runtime import build_test_django_image_reference
|
||||
from control_plane.local_test_runtime import build_test_django_local_url
|
||||
from control_plane.local_test_runtime import build_test_django_secret_mounts
|
||||
from control_plane.local_test_runtime import write_test_django_project
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import DeploymentStatus
|
||||
from control_plane.models import RuntimeService
|
||||
from control_plane.models import RuntimeServiceKind
|
||||
from control_plane.models import RuntimeServiceStatus
|
||||
from control_plane.observability import capture_test_deployment_diagnostics
|
||||
from control_plane.runtime_plans import DjangoApplicationLaunchConfig
|
||||
from control_plane.runtime_plans import DjangoContainerImageBuildConfig
|
||||
from control_plane.runtime_plans import DjangoContainerRuntimeConfig
|
||||
from control_plane.runtime_plans import PostgresContainerConfig
|
||||
from control_plane.runtime_plans import RedisContainerConfig
|
||||
from control_plane.runtime_plans import build_django_container_image_command
|
||||
from control_plane.runtime_plans import build_django_container_run_command
|
||||
from control_plane.runtime_plans import build_django_migrate_command
|
||||
from control_plane.runtime_plans import build_django_server_command
|
||||
from control_plane.runtime_plans import build_postgres_container_command
|
||||
from control_plane.runtime_plans import build_redis_container_command
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from celery.app.task import Task
|
||||
|
||||
type BoundControlPlaneTask = Task[..., str]
|
||||
|
||||
|
||||
logger = logging.getLogger("tussilago.control_plane.tasks")
|
||||
|
||||
DEFAULT_HTTP_READY_TIMEOUT_SECONDS = 45.0
|
||||
DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS = 45.0
|
||||
|
||||
|
||||
TERMINAL_DEPLOYMENT_STATES: frozenset[str] = frozenset(
|
||||
{
|
||||
DeploymentStatus.DESTROYED.value,
|
||||
DeploymentStatus.FAILED.value,
|
||||
},
|
||||
)
|
||||
|
||||
TERMINAL_RUNTIME_SERVICE_STATES: frozenset[str] = frozenset(
|
||||
{
|
||||
RuntimeServiceStatus.DESTROYING.value,
|
||||
RuntimeServiceStatus.DESTROYED.value,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _runtime_service_root(runtime_service: RuntimeService) -> Path:
|
||||
"""Return filesystem root for one runtime service's local test artifacts."""
|
||||
return Path(settings.DATA_DIR) / "runtime-services" / str(runtime_service.deployment_id) / runtime_service.kind
|
||||
|
||||
|
||||
def _mark_deployment_failed(*, deployment_id: str, message: str) -> None:
|
||||
"""Persist failed deployment state with the latest error details."""
|
||||
with transaction.atomic():
|
||||
deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
deployment.status = DeploymentStatus.FAILED.value
|
||||
deployment.last_error = message
|
||||
deployment.finished_at = timezone.now()
|
||||
deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"])
|
||||
|
||||
|
||||
def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None:
|
||||
"""Persist best-effort diagnostics without breaking deployment flow."""
|
||||
try:
|
||||
capture_test_deployment_diagnostics(deployment_id)
|
||||
except OSError:
|
||||
logger.exception("Failed to write diagnostics snapshot deployment_id=%s", deployment_id)
|
||||
except ValueError:
|
||||
logger.exception("Invalid diagnostics snapshot state deployment_id=%s", deployment_id)
|
||||
except Deployment.DoesNotExist:
|
||||
logger.exception("Diagnostics snapshot skipped for missing deployment_id=%s", deployment_id)
|
||||
|
||||
|
||||
def _ensure_test_django_image_exists(image_reference: str) -> None:
|
||||
"""Build the reusable Django test image if it is missing locally.
|
||||
|
||||
Raises:
|
||||
HostCommandError: If Podman image inspection or build fails.
|
||||
"""
|
||||
try:
|
||||
run_host_command(command=("podman", "image", "exists", image_reference))
|
||||
except HostCommandError as error:
|
||||
if error.returncode != 1:
|
||||
raise
|
||||
|
||||
run_host_command(
|
||||
command=build_django_container_image_command(
|
||||
DjangoContainerImageBuildConfig(
|
||||
image_reference=image_reference,
|
||||
containerfile_path=build_test_django_containerfile_path(),
|
||||
context_directory=build_test_django_container_context_path(),
|
||||
),
|
||||
),
|
||||
timeout_seconds=300.0,
|
||||
)
|
||||
|
||||
|
||||
def _read_container_logs(container_name: str) -> str:
|
||||
"""Return captured container logs for failure reporting when available."""
|
||||
try:
|
||||
result = run_host_command(command=("podman", "logs", container_name))
|
||||
except HostCommandError:
|
||||
return ""
|
||||
|
||||
return result.stdout.strip() or result.stderr.strip()
|
||||
|
||||
|
||||
def _read_container_status(container_name: str) -> str:
|
||||
"""Return current Podman health status for one container when available."""
|
||||
result = run_host_command(
|
||||
command=(
|
||||
"podman",
|
||||
"inspect",
|
||||
"--format",
|
||||
"{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}",
|
||||
container_name,
|
||||
),
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def _wait_for_container_ready(
|
||||
runtime_service: RuntimeService,
|
||||
*,
|
||||
timeout_seconds: float = DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS,
|
||||
) -> None:
|
||||
"""Poll Podman health state until one runtime service is ready.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the runtime service exits or becomes unhealthy before it is ready.
|
||||
TimeoutError: If the runtime service does not become ready before timeout.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
while time.monotonic() < deadline:
|
||||
status = _read_container_status(runtime_service.container_name)
|
||||
if status == "healthy":
|
||||
return
|
||||
if status in {"exited", "dead", "stopped", "unhealthy"}:
|
||||
logs = _read_container_logs(runtime_service.container_name)
|
||||
message = f"Runtime service {runtime_service.kind} failed to become ready: {status}."
|
||||
if logs:
|
||||
message = f"{message}\n{logs}"
|
||||
raise RuntimeError(message)
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
msg = f"Timed out waiting for runtime service {runtime_service.kind} to become healthy."
|
||||
raise TimeoutError(msg)
|
||||
|
||||
|
||||
def _wait_for_http_ready(
|
||||
url: str,
|
||||
*,
|
||||
timeout_seconds: float = DEFAULT_HTTP_READY_TIMEOUT_SECONDS,
|
||||
) -> dict[str, str | int]:
|
||||
"""Poll a sentinel endpoint until it confirms PostgreSQL and Redis connectivity.
|
||||
|
||||
Returns:
|
||||
Parsed JSON response from the sentinel endpoint.
|
||||
|
||||
Raises:
|
||||
TimeoutError: If the endpoint does not become healthy before timeout.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
last_error: Exception | None = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with urlopen(url, timeout=2) as response: # noqa: S310
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
if payload.get("status") == "ok":
|
||||
return payload
|
||||
except (OSError, json.JSONDecodeError) as error:
|
||||
last_error = error
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
msg = f"Timed out waiting for healthy Django sentinel endpoint at {url}"
|
||||
raise TimeoutError(msg) from last_error
|
||||
|
||||
|
||||
def _build_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]:
|
||||
return tuple(
|
||||
RuntimeService.objects
|
||||
.select_related("deployment__hosted_site__tenant")
|
||||
.filter(deployment=deployment)
|
||||
.order_by("kind"),
|
||||
)
|
||||
|
||||
|
||||
def _get_ready_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]:
|
||||
"""Return ready runtime services required by the generated Django test app.
|
||||
|
||||
Raises:
|
||||
ValueError: If PostgreSQL or Redis containers are not ready.
|
||||
"""
|
||||
runtime_services = _build_django_runtime_services(deployment)
|
||||
if not runtime_services or any(
|
||||
runtime_service.status != RuntimeServiceStatus.READY.value for runtime_service in runtime_services
|
||||
):
|
||||
msg = "All runtime services must be ready before provisioning the Django test runtime."
|
||||
raise ValueError(msg)
|
||||
|
||||
return runtime_services
|
||||
|
||||
|
||||
def _build_django_runtime_configs(
|
||||
deployment: Deployment,
|
||||
runtime_services: tuple[RuntimeService, ...],
|
||||
*,
|
||||
project_root: Path,
|
||||
) -> tuple[str, DjangoContainerRuntimeConfig, DjangoContainerRuntimeConfig]:
|
||||
"""Build image reference plus migrate and server configs for one deployment.
|
||||
|
||||
Returns:
|
||||
Image reference plus migrate and server Podman runtime configs.
|
||||
"""
|
||||
image_reference = build_test_django_image_reference()
|
||||
environment = build_test_django_environment(deployment, runtime_services)
|
||||
secret_mounts = build_test_django_secret_mounts(runtime_services)
|
||||
labels = build_test_django_container_labels(deployment)
|
||||
server_container_name, migrate_container_name = build_test_django_container_names(deployment)
|
||||
network_name = runtime_services[0].network_name
|
||||
migrate_config = DjangoContainerRuntimeConfig(
|
||||
container_name=migrate_container_name,
|
||||
network_name=network_name,
|
||||
hostname="django-migrate.internal",
|
||||
image_reference=image_reference,
|
||||
application_directory=project_root,
|
||||
pod_name=network_name,
|
||||
working_directory=TEST_DJANGO_WORKDIR,
|
||||
environment=environment,
|
||||
secret_mounts=secret_mounts,
|
||||
labels=labels,
|
||||
)
|
||||
server_config = DjangoContainerRuntimeConfig(
|
||||
container_name=server_container_name,
|
||||
network_name=network_name,
|
||||
hostname="django.internal",
|
||||
image_reference=image_reference,
|
||||
application_directory=project_root,
|
||||
pod_name=network_name,
|
||||
host_port=deployment.guest_port,
|
||||
guest_port=TEST_DJANGO_CONTAINER_PORT,
|
||||
working_directory=TEST_DJANGO_WORKDIR,
|
||||
environment=environment,
|
||||
secret_mounts=secret_mounts,
|
||||
labels=labels,
|
||||
)
|
||||
return image_reference, migrate_config, server_config
|
||||
|
||||
|
||||
def _launch_django_runtime(
|
||||
deployment: Deployment,
|
||||
*,
|
||||
image_reference: str,
|
||||
migrate_config: DjangoContainerRuntimeConfig,
|
||||
server_config: DjangoContainerRuntimeConfig,
|
||||
) -> dict[str, str | int]:
|
||||
"""Build image, run migrations, launch the Django container, and wait for readiness.
|
||||
|
||||
Returns:
|
||||
Parsed JSON sentinel payload from the running Django test app.
|
||||
"""
|
||||
_ensure_test_django_image_exists(image_reference)
|
||||
|
||||
migrate_command = build_django_migrate_command(python_executable=Path("/usr/local/bin/python"))
|
||||
run_host_command(
|
||||
command=build_django_container_run_command(
|
||||
migrate_config,
|
||||
command=migrate_command,
|
||||
detach=False,
|
||||
remove=True,
|
||||
),
|
||||
timeout_seconds=120.0,
|
||||
)
|
||||
|
||||
server_command = build_django_server_command(
|
||||
DjangoApplicationLaunchConfig(
|
||||
wsgi_module=deployment.hosted_site.wsgi_module,
|
||||
bind_host="0.0.0.0", # noqa: S104
|
||||
port=TEST_DJANGO_CONTAINER_PORT,
|
||||
workers=1,
|
||||
python_executable=Path("/usr/local/bin/python"),
|
||||
),
|
||||
)
|
||||
run_host_command(
|
||||
command=build_django_container_run_command(
|
||||
server_config,
|
||||
command=server_command,
|
||||
detach=True,
|
||||
),
|
||||
timeout_seconds=120.0,
|
||||
)
|
||||
return _wait_for_http_ready(build_test_django_local_url(deployment))
|
||||
|
||||
|
||||
def _retry_or_fail_django_runtime(
|
||||
self: BoundControlPlaneTask,
|
||||
*,
|
||||
deployment: Deployment,
|
||||
error: HostCommandError | TimeoutError,
|
||||
) -> NoReturn:
|
||||
"""Retry transient Django runtime failures, or mark deployment failed when retries are exhausted."""
|
||||
retries = getattr(self.request, "retries", 0)
|
||||
logger.warning(
|
||||
"Django runtime provisioning retry deployment_id=%s retries=%s error=%s",
|
||||
deployment.id,
|
||||
retries,
|
||||
error,
|
||||
)
|
||||
if retries >= self.max_retries:
|
||||
server_container_name, _ = build_test_django_container_names(deployment)
|
||||
logs = _read_container_logs(server_container_name)
|
||||
failure_message = str(error)
|
||||
if logs:
|
||||
failure_message = f"{failure_message}\n{logs}"
|
||||
_mark_deployment_failed(deployment_id=str(deployment.id), message=failure_message)
|
||||
_capture_test_deployment_diagnostics_snapshot(str(deployment.id))
|
||||
logger.error("Django runtime provisioning failed deployment_id=%s", deployment.id)
|
||||
raise error
|
||||
|
||||
countdown = min(300, 2 ** (retries + 1))
|
||||
raise self.retry(exc=error, countdown=countdown) from error
|
||||
|
||||
|
||||
def run_test_django_runtime_provisioning(deployment_id: str) -> str:
|
||||
"""Run generated Django runtime provisioning inline for one deployment.
|
||||
|
||||
Returns:
|
||||
Final deployment status for the processed deployment.
|
||||
"""
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES or deployment.status == DeploymentStatus.RUNNING.value:
|
||||
return deployment.status
|
||||
|
||||
runtime_services = _get_ready_django_runtime_services(deployment)
|
||||
project_root = write_test_django_project(deployment, runtime_services)
|
||||
image_reference, migrate_config, server_config = _build_django_runtime_configs(
|
||||
deployment,
|
||||
runtime_services,
|
||||
project_root=project_root,
|
||||
)
|
||||
sentinel_payload = _launch_django_runtime(
|
||||
deployment,
|
||||
image_reference=image_reference,
|
||||
migrate_config=migrate_config,
|
||||
server_config=server_config,
|
||||
)
|
||||
|
||||
with transaction.atomic():
|
||||
deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
|
||||
deployment.status = DeploymentStatus.RUNNING.value
|
||||
deployment.last_error = ""
|
||||
deployment.started_at = timezone.now()
|
||||
deployment.finished_at = None
|
||||
deployment.save(update_fields=["status", "last_error", "started_at", "finished_at", "updated_at"])
|
||||
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
logger.info(
|
||||
"Django runtime ready deployment_id=%s tenant_slug=%s site_slug=%s postgres=%s redis=%s",
|
||||
deployment_id,
|
||||
deployment.hosted_site.tenant.slug,
|
||||
deployment.hosted_site.slug,
|
||||
sentinel_payload.get("postgres"),
|
||||
sentinel_payload.get("redis"),
|
||||
)
|
||||
return DeploymentStatus.RUNNING.value
|
||||
|
||||
|
||||
def _ensure_secret_file(password_file: Path) -> None:
|
||||
"""Write a reusable password file for a test container if one does not already exist."""
|
||||
password_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
if password_file.exists():
|
||||
return
|
||||
|
||||
password_file.write_text(f"{secrets.token_urlsafe(24)}\n", encoding="utf-8")
|
||||
password_file.chmod(0o600)
|
||||
|
||||
|
||||
def _ensure_podman_pod(*, pod_name: str, host_port: int) -> None:
|
||||
"""Create a Podman pod if it is missing.
|
||||
|
||||
Raises:
|
||||
HostCommandError: If Podman pod inspection or creation fails.
|
||||
"""
|
||||
try:
|
||||
run_host_command(command=("podman", "pod", "exists", pod_name))
|
||||
except HostCommandError as error:
|
||||
if error.returncode != 1:
|
||||
raise
|
||||
|
||||
run_host_command(
|
||||
command=(
|
||||
"podman",
|
||||
"pod",
|
||||
"create",
|
||||
"--replace",
|
||||
"--name",
|
||||
pod_name,
|
||||
"--publish",
|
||||
f"127.0.0.1:{host_port}:{TEST_DJANGO_CONTAINER_PORT}",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _build_runtime_service_command(
|
||||
runtime_service: RuntimeService,
|
||||
*,
|
||||
data_directory: Path,
|
||||
password_file: Path,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a Podman command for one runtime service kind.
|
||||
|
||||
Returns:
|
||||
Podman command arguments for the runtime service.
|
||||
|
||||
Raises:
|
||||
ValueError: If the runtime service kind or configuration is unsupported.
|
||||
"""
|
||||
if runtime_service.kind == RuntimeServiceKind.POSTGRESQL.value:
|
||||
if not runtime_service.connection_username or not runtime_service.connection_database:
|
||||
msg = "PostgreSQL runtime service requires connection credentials."
|
||||
raise ValueError(msg)
|
||||
|
||||
return build_postgres_container_command(
|
||||
PostgresContainerConfig(
|
||||
container_name=runtime_service.container_name,
|
||||
network_name=runtime_service.network_name,
|
||||
hostname=runtime_service.hostname,
|
||||
username=runtime_service.connection_username,
|
||||
database_name=runtime_service.connection_database,
|
||||
data_directory=data_directory,
|
||||
password_file=password_file,
|
||||
pod_name=runtime_service.network_name,
|
||||
image_reference=runtime_service.image_reference,
|
||||
),
|
||||
)
|
||||
|
||||
if runtime_service.kind == RuntimeServiceKind.REDIS.value:
|
||||
return build_redis_container_command(
|
||||
RedisContainerConfig(
|
||||
container_name=runtime_service.container_name,
|
||||
network_name=runtime_service.network_name,
|
||||
hostname=runtime_service.hostname,
|
||||
data_directory=data_directory,
|
||||
password_file=password_file,
|
||||
pod_name=runtime_service.network_name,
|
||||
image_reference=runtime_service.image_reference,
|
||||
),
|
||||
)
|
||||
|
||||
msg = f"Unsupported runtime service kind: {runtime_service.kind}"
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def _provision_runtime_service_container(runtime_service: RuntimeService) -> None:
|
||||
"""Create or replace a local test container for one runtime service."""
|
||||
service_root = _runtime_service_root(runtime_service)
|
||||
data_directory = service_root / "data"
|
||||
password_file = service_root / "secrets" / "password"
|
||||
|
||||
data_directory.mkdir(parents=True, exist_ok=True)
|
||||
_ensure_secret_file(password_file)
|
||||
_ensure_podman_pod(
|
||||
pod_name=runtime_service.network_name,
|
||||
host_port=runtime_service.deployment.guest_port,
|
||||
)
|
||||
|
||||
command = _build_runtime_service_command(
|
||||
runtime_service,
|
||||
data_directory=data_directory,
|
||||
password_file=password_file,
|
||||
)
|
||||
run_host_command(command=command)
|
||||
_wait_for_container_ready(runtime_service)
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
autoretry_for=(HostCommandError, TimeoutError),
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=5,
|
||||
)
|
||||
def provision_test_runtime_services(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Seed and provision runtime service test containers for one deployment.
|
||||
|
||||
Returns:
|
||||
Final runtime service status for the processed deployment.
|
||||
|
||||
Raises:
|
||||
HostCommandError: If Podman commands fail while provisioning backing services.
|
||||
RuntimeError: If a backing container exits or becomes unhealthy during startup.
|
||||
TimeoutError: If a backing container never becomes healthy.
|
||||
ValueError: If runtime service configuration is invalid.
|
||||
"""
|
||||
del self
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
|
||||
deployment.ensure_test_runtime_services()
|
||||
runtime_services = tuple(
|
||||
RuntimeService.objects
|
||||
.select_related("deployment__hosted_site__tenant")
|
||||
.filter(deployment=deployment)
|
||||
.order_by("kind"),
|
||||
)
|
||||
pending_runtime_services = tuple(
|
||||
runtime_service
|
||||
for runtime_service in runtime_services
|
||||
if runtime_service.status not in TERMINAL_RUNTIME_SERVICE_STATES
|
||||
and runtime_service.status != RuntimeServiceStatus.READY.value
|
||||
)
|
||||
if not pending_runtime_services:
|
||||
return RuntimeServiceStatus.READY.value
|
||||
|
||||
for runtime_service in pending_runtime_services:
|
||||
runtime_service.status = RuntimeServiceStatus.PROVISIONING.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
|
||||
try:
|
||||
_provision_runtime_service_container(runtime_service)
|
||||
except HostCommandError, RuntimeError, TimeoutError:
|
||||
runtime_service.status = RuntimeServiceStatus.FAILED.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
logger.exception(
|
||||
"Runtime service provisioning failed deployment_id=%s runtime_service_id=%s kind=%s",
|
||||
deployment_id,
|
||||
runtime_service.id,
|
||||
runtime_service.kind,
|
||||
)
|
||||
raise
|
||||
except ValueError:
|
||||
runtime_service.status = RuntimeServiceStatus.FAILED.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
logger.exception(
|
||||
"Runtime service configuration invalid deployment_id=%s runtime_service_id=%s kind=%s",
|
||||
deployment_id,
|
||||
runtime_service.id,
|
||||
runtime_service.kind,
|
||||
)
|
||||
raise
|
||||
|
||||
runtime_service.status = RuntimeServiceStatus.READY.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
return RuntimeServiceStatus.READY.value
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=5,
|
||||
)
|
||||
def mark_deployment_provisioning(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Move a deployment into provisioning state in an idempotent way.
|
||||
|
||||
Returns:
|
||||
The deployment status after the transition attempt.
|
||||
"""
|
||||
del self
|
||||
with transaction.atomic():
|
||||
deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
if deployment.status == DeploymentStatus.PROVISIONING.value:
|
||||
return deployment.status
|
||||
|
||||
deployment.status = DeploymentStatus.PROVISIONING.value
|
||||
deployment.last_error = ""
|
||||
deployment.save(update_fields=["status", "last_error", "updated_at"])
|
||||
return deployment.status
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=5,
|
||||
)
|
||||
def mark_deployment_booting(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Move a deployment into booting state in an idempotent way.
|
||||
|
||||
Returns:
|
||||
The deployment status after the transition attempt.
|
||||
"""
|
||||
del self
|
||||
with transaction.atomic():
|
||||
deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
if deployment.status == DeploymentStatus.BOOTING.value:
|
||||
return deployment.status
|
||||
|
||||
deployment.status = DeploymentStatus.BOOTING.value
|
||||
deployment.save(update_fields=["status", "updated_at"])
|
||||
return deployment.status
|
||||
|
||||
|
||||
@shared_task(bind=True, max_retries=5)
|
||||
def provision_test_django_runtime(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Build and run a generated Django test app against ready PostgreSQL and Redis containers.
|
||||
|
||||
Returns:
|
||||
Final deployment status for the processed deployment.
|
||||
|
||||
Raises:
|
||||
ValueError: If required backing services are not ready.
|
||||
"""
|
||||
try:
|
||||
return run_test_django_runtime_provisioning(deployment_id)
|
||||
except ValueError as error:
|
||||
_mark_deployment_failed(deployment_id=deployment_id, message=str(error))
|
||||
logger.exception("Django runtime configuration invalid deployment_id=%s", deployment_id)
|
||||
raise
|
||||
except (HostCommandError, TimeoutError) as error:
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
_retry_or_fail_django_runtime(self, deployment=deployment, error=error)
|
||||
Loading…
Add table
Add a link
Reference in a new issue