Add JSON and CSV functionality to database backup command

Also add Kick to dataset
2026-03-17 00:38:01 +01:00 · 2026-03-17 00:07:22 +01:00
3 changed files with 238 additions and 12 deletions
--- a/core/views.py
+++ b/core/views.py
@ -645,8 +645,8 @@ def dataset_backups_view(request: HttpRequest) -> HttpResponse:
    datasets.sort(key=operator.itemgetter("updated_at"), reverse=True)

    seo_context: dict[str, Any] = _build_seo_context(
-        page_title="Twitch Dataset",
-        page_description="Database backups and datasets available for download.",
+        page_title="Twitch/Kick drop data",
+        page_description="Twitch/Kick datasets available for download, including historical drop campaign data and more.",
    )
    context: dict[str, Any] = {
        "datasets": datasets,
--- a/twitch/management/commands/backup_db.py
+++ b/twitch/management/commands/backup_db.py
@ -1,4 +1,6 @@
+import csv
 import io
+import json
 import os
 import shutil
 import subprocess  # noqa: S404
@ -19,9 +21,9 @@ if TYPE_CHECKING:


 class Command(BaseCommand):
-    """Create a compressed SQL dump of the Twitch dataset tables."""
+    """Create a compressed SQL dump of the Twitch and Kick dataset tables."""

-    help = "Create a compressed SQL dump of the Twitch dataset tables."
+    help = "Create a compressed SQL dump of the Twitch and Kick dataset tables."

    def add_arguments(self, parser: ArgumentParser) -> None:
        """Define arguments for the backup command."""
@ -59,9 +61,14 @@ class Command(BaseCommand):
        timestamp: str = timezone.localtime(timezone.now()).strftime("%Y%m%d-%H%M%S")
        output_path: Path = output_dir / f"{prefix}-{timestamp}.sql.zst"

-        allowed_tables = _get_allowed_tables("twitch_")
+        allowed_tables = sorted({
+            *_get_allowed_tables("twitch_"),
+            *_get_allowed_tables("kick_"),
+        })
        if not allowed_tables:
-            self.stdout.write(self.style.WARNING("No twitch tables found to back up."))
+            self.stdout.write(
+                self.style.WARNING("No twitch or kick tables found to back up."),
+            )
            return

        if django_connection.vendor == "postgresql":
@ -77,6 +84,16 @@ class Command(BaseCommand):
            msg = f"Unsupported database backend: {django_connection.vendor}"
            raise CommandError(msg)

+        json_path: Path = output_dir / f"{prefix}-{timestamp}.json.zst"
+        _write_json_dump(json_path, allowed_tables)
+
+        csv_paths: list[Path] = _write_csv_dumps(
+            output_dir,
+            prefix,
+            timestamp,
+            allowed_tables,
+        )
+
        created_at: datetime = datetime.fromtimestamp(
            output_path.stat().st_mtime,
            tz=timezone.get_current_timezone(),
@ -86,6 +103,10 @@ class Command(BaseCommand):
                f"Backup created: {output_path} (updated {created_at.isoformat()})",
            ),
        )
+        self.stdout.write(self.style.SUCCESS(f"JSON backup created: {json_path}"))
+        self.stdout.write(
+            self.style.SUCCESS(f"CSV backups created: {len(csv_paths)} files"),
+        )
        self.stdout.write(self.style.SUCCESS(f"Included tables: {len(allowed_tables)}"))


@ -293,3 +314,77 @@ def _sql_literal(value: object) -> str:
    if isinstance(value, bytes):
        return "X'" + value.hex() + "'"
    return "'" + str(value).replace("'", "''") + "'"
+
+
+def _json_default(value: object) -> str:
+    """Convert non-serializable values to JSON-compatible strings.
+
+    Args:
+        value: Value to convert.
+
+    Returns:
+        String representation.
+    """
+    if isinstance(value, bytes):
+        return value.hex()
+    return str(value)
+
+
+def _write_json_dump(output_path: Path, tables: list[str]) -> None:
+    """Write a JSON dump of all tables into a zstd-compressed file.
+
+    Args:
+        output_path: Destination path for the zstd file.
+        tables: Table names to include.
+    """
+    data: dict[str, list[dict]] = {}
+    with django_connection.cursor() as cursor:
+        for table in tables:
+            cursor.execute(f'SELECT * FROM "{table}"')  # noqa: S608
+            columns: list[str] = [col[0] for col in cursor.description]
+            rows = cursor.fetchall()
+            data[table] = [dict(zip(columns, row, strict=False)) for row in rows]
+
+    with (
+        output_path.open("wb") as raw_handle,
+        zstd.open(raw_handle, "w") as compressed,
+        io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+    ):
+        json.dump(data, handle, default=_json_default)
+
+
+def _write_csv_dumps(
+    output_dir: Path,
+    prefix: str,
+    timestamp: str,
+    tables: list[str],
+) -> list[Path]:
+    """Write per-table CSV files into zstd-compressed files.
+
+    Args:
+        output_dir: Directory where CSV files will be written.
+        prefix: Filename prefix.
+        timestamp: Timestamp string for filenames.
+        tables: Table names to include.
+
+    Returns:
+        List of created file paths.
+    """
+    paths: list[Path] = []
+    with django_connection.cursor() as cursor:
+        for table in tables:
+            cursor.execute(f'SELECT * FROM "{table}"')  # noqa: S608
+            columns: list[str] = [col[0] for col in cursor.description]
+            rows: list[tuple] = cursor.fetchall()
+
+            output_path: Path = output_dir / f"{prefix}-{timestamp}-{table}.csv.zst"
+            with (
+                output_path.open("wb") as raw_handle,
+                zstd.open(raw_handle, "w") as compressed,
+                io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+            ):
+                writer: csv.Writer = csv.writer(handle)
+                writer.writerow(columns)
+                writer.writerows(rows)
+            paths.append(output_path)
+    return paths
--- a/twitch/tests/test_backup.py
+++ b/twitch/tests/test_backup.py
@ -1,8 +1,11 @@
+import csv
 import io
+import json
 import math
 import os
 import shutil
 from compression import zstd
+from datetime import datetime as dt
 from typing import TYPE_CHECKING

 import pytest
@ -12,13 +15,18 @@ from django.db import connection
 from django.urls import reverse

 from twitch.management.commands.backup_db import _get_allowed_tables
+from twitch.management.commands.backup_db import _json_default
 from twitch.management.commands.backup_db import _sql_literal
+from twitch.management.commands.backup_db import _write_csv_dumps
+from twitch.management.commands.backup_db import _write_json_dump
 from twitch.management.commands.backup_db import _write_postgres_dump
 from twitch.management.commands.backup_db import _write_sqlite_dump
 from twitch.models import Game
 from twitch.models import Organization

 if TYPE_CHECKING:
+    from csv import Reader
+    from datetime import datetime
    from pathlib import Path

    from django.test import Client
@ -84,34 +92,39 @@ class TestBackupCommand:
        assert "twitch_game" in content
        assert "Test Org" in content

-    def test_backup_excludes_non_twitch_tables(self, tmp_path: Path) -> None:
-        """Test that backup only includes twitch_ prefixed tables."""
+    def test_backup_excludes_non_app_tables(self, tmp_path: Path) -> None:
+        """Test that backup includes app tables and excludes non-app tables."""
        _skip_if_pg_dump_missing()
        # Create test data so tables exist
        Organization.objects.create(twitch_id="test001", name="Test Org")

-        output_dir = tmp_path / "backups"
+        output_dir: Path = tmp_path / "backups"
        output_dir.mkdir()

        call_command("backup_db", output_dir=str(output_dir), prefix="test")

-        backup_file = next(iter(output_dir.glob("test-*.sql.zst")))
+        backup_file: Path = next(iter(output_dir.glob("test-*.sql.zst")))

        with (
            backup_file.open("rb") as raw_handle,
            zstd.open(raw_handle, "r") as compressed,
            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
        ):
-            content = handle.read()
+            content: str = handle.read()

        # Should NOT contain django admin, silk, or debug toolbar tables
        assert "django_session" not in content
+        assert "django_migrations" not in content
+        assert "django_content_type" not in content
        assert "silk_" not in content
        assert "debug_toolbar_" not in content
        assert "django_admin_log" not in content
+        assert "auth_" not in content
+        assert "youtube_" not in content

-        # Should contain twitch tables
+        # Should contain twitch and kick tables
        assert "twitch_" in content
+        assert "kick_" in content

    def test_backup_with_custom_prefix(self, tmp_path: Path) -> None:
        """Test that custom prefix is used in filename."""
@ -159,6 +172,59 @@ class TestBackupCommand:
        backup_files = list(datasets_dir.glob("ttvdrops-*.sql.zst"))
        assert len(backup_files) >= 1

+    def test_backup_creates_json_file(self, tmp_path: Path) -> None:
+        """Test that backup command creates a JSON file alongside the SQL dump."""
+        _skip_if_pg_dump_missing()
+        Organization.objects.create(twitch_id="test_json", name="Test Org JSON")
+
+        output_dir: Path = tmp_path / "backups"
+        output_dir.mkdir()
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="test")
+
+        json_files: list[Path] = list(output_dir.glob("test-*.json.zst"))
+        assert len(json_files) == 1
+
+        with (
+            json_files[0].open("rb") as raw_handle,
+            zstd.open(raw_handle, "r") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            data = json.load(handle)
+
+        assert isinstance(data, dict)
+        assert "twitch_organization" in data
+        assert any(
+            row.get("name") == "Test Org JSON" for row in data["twitch_organization"]
+        )
+
+    def test_backup_creates_csv_files(self, tmp_path: Path) -> None:
+        """Test that backup command creates per-table CSV files alongside the SQL dump."""
+        _skip_if_pg_dump_missing()
+        Organization.objects.create(twitch_id="test_csv", name="Test Org CSV")
+
+        output_dir: Path = tmp_path / "backups"
+        output_dir.mkdir()
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="test")
+
+        org_csv_files: list[Path] = list(
+            output_dir.glob("test-*-twitch_organization.csv.zst"),
+        )
+        assert len(org_csv_files) == 1
+
+        with (
+            org_csv_files[0].open("rb") as raw_handle,
+            zstd.open(raw_handle, "r") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            reader: Reader = csv.reader(handle)
+            rows: list[list[str]] = list(reader)
+
+        assert len(rows) >= 2  # header + at least one data row
+        assert "name" in rows[0]
+        assert any("Test Org CSV" in row for row in rows[1:])
+

@pytest.mark.django_db
 class TestBackupHelperFunctions:
@ -245,6 +311,71 @@ class TestBackupHelperFunctions:
            assert "INSERT INTO" in content
            assert "Write Test Org" in content

+    def test_write_json_dump_creates_valid_json(self, tmp_path: Path) -> None:
+        """Test _write_json_dump creates valid compressed JSON with all tables."""
+        Organization.objects.create(
+            twitch_id="test_json_helper",
+            name="JSON Helper Org",
+        )
+
+        tables: list[str] = _get_allowed_tables("twitch_")
+        output_path: Path = tmp_path / "backup.json.zst"
+        _write_json_dump(output_path, tables)
+
+        with (
+            output_path.open("rb") as raw_handle,
+            zstd.open(raw_handle, "r") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            data = json.load(handle)
+
+        assert isinstance(data, dict)
+        assert "twitch_organization" in data
+        assert all(table in data for table in tables)
+        assert any(
+            row.get("name") == "JSON Helper Org" for row in data["twitch_organization"]
+        )
+
+    def test_write_csv_dumps_creates_per_table_files(self, tmp_path: Path) -> None:
+        """Test _write_csv_dumps creates one compressed CSV file per table."""
+        Organization.objects.create(twitch_id="test_csv_helper", name="CSV Helper Org")
+
+        tables: list[str] = _get_allowed_tables("twitch_")
+        paths: list[Path] = _write_csv_dumps(
+            tmp_path,
+            "test",
+            "20260317-120000",
+            tables,
+        )
+
+        assert len(paths) == len(tables)
+        assert all(p.exists() for p in paths)
+
+        org_csv: Path = tmp_path / "test-20260317-120000-twitch_organization.csv.zst"
+        assert org_csv.exists()
+
+        with (
+            org_csv.open("rb") as raw_handle,
+            zstd.open(raw_handle, "r") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            reader: Reader = csv.reader(handle)
+            rows: list[list[str]] = list(reader)
+
+        assert len(rows) >= 2  # header + at least one data row
+        assert "name" in rows[0]
+        assert any("CSV Helper Org" in row for row in rows[1:])
+
+    def test_json_default_handles_bytes(self) -> None:
+        """Test _json_default converts bytes to hex string."""
+        assert _json_default(b"\x00\x01") == "0001"
+        assert _json_default(b"hello") == "68656c6c6f"
+
+    def test_json_default_handles_other_types(self) -> None:
+        """Test _json_default falls back to str() for other types."""
+        value: datetime = dt(2026, 3, 17, 12, 0, 0, tzinfo=dt.now().astimezone().tzinfo)
+        assert _json_default(value) == str(value)
+

@pytest.mark.django_db
 class TestDatasetBackupViews:
Author	SHA1	Message	Date
Joakim Helleśen	9fd22ba8a8	Add JSON and CSV functionality to database backup command All checks were successful Deploy to Server / deploy (push) Successful in 40s Details	2026-03-17 00:38:01 +01:00
Joakim Helleśen	563266d8cc	Also add Kick to dataset	2026-03-17 00:07:22 +01:00