Implement dataset functionality with views, URLs, and management command

2026-02-10 16:47:54 +01:00 · 2026-02-10 16:47:54 +01:00 · a12b34a665
commit a12b34a665
parent 45c581715a
12 changed files with 812 additions and 51 deletions
--- a/twitch/tests/test_backup.py
+++ b/twitch/tests/test_backup.py
@ -0,0 +1,451 @@
+from __future__ import annotations
+
+import io
+import math
+import os
+from compression import zstd
+from typing import TYPE_CHECKING
+
+import pytest
+from django.conf import settings
+from django.core.management import call_command
+from django.db import connection
+from django.urls import reverse
+
+from twitch.management.commands.backup_db import _get_allowed_tables
+from twitch.management.commands.backup_db import _sql_literal
+from twitch.management.commands.backup_db import _write_dump
+from twitch.models import Game
+from twitch.models import Organization
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from django.test import Client
+    from django.test.client import _MonkeyPatchedWSGIResponse
+
+
+@pytest.mark.django_db
+class TestBackupCommand:
+    """Tests for the backup_db management command."""
+
+    def test_backup_creates_file(self, tmp_path: Path) -> None:
+        """Test that backup command creates a zstd compressed file."""
+        # Create test data so tables exist
+        Organization.objects.create(twitch_id="test000", name="Test Org")
+
+        output_dir = tmp_path / "backups"
+        output_dir.mkdir()
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="test")
+
+        backup_files = list(output_dir.glob("test-*.sql.zst"))
+        assert len(backup_files) == 1
+        assert backup_files[0].exists()
+        assert backup_files[0].stat().st_size > 0
+
+    def test_backup_contains_sql_content(self, tmp_path: Path) -> None:
+        """Test that backup file contains valid SQL content."""
+        output_dir = tmp_path / "backups"
+        output_dir.mkdir()
+
+        # Create some test data
+        org = Organization.objects.create(twitch_id="test123", name="Test Org")
+        game = Game.objects.create(twitch_id="game456", display_name="Test Game")
+        game.owners.add(org)
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="test")
+
+        backup_file = next(iter(output_dir.glob("test-*.sql.zst")))
+
+        # Decompress and read content
+        with (
+            backup_file.open("rb") as raw_handle,
+            zstd.open(raw_handle, "r") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            content = handle.read()
+
+        assert "PRAGMA foreign_keys=OFF;" in content
+        assert "BEGIN TRANSACTION;" in content
+        assert "COMMIT;" in content
+        assert "twitch_organization" in content
+        assert "twitch_game" in content
+        assert "Test Org" in content
+
+    def test_backup_excludes_non_twitch_tables(self, tmp_path: Path) -> None:
+        """Test that backup only includes twitch_ prefixed tables."""
+        # Create test data so tables exist
+        Organization.objects.create(twitch_id="test001", name="Test Org")
+
+        output_dir = tmp_path / "backups"
+        output_dir.mkdir()
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="test")
+
+        backup_file = next(iter(output_dir.glob("test-*.sql.zst")))
+
+        with (
+            backup_file.open("rb") as raw_handle,
+            zstd.open(raw_handle, "r") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            content = handle.read()
+
+        # Should NOT contain django admin, silk, or debug toolbar tables
+        assert "django_session" not in content
+        assert "silk_" not in content
+        assert "debug_toolbar_" not in content
+        assert "django_admin_log" not in content
+
+        # Should contain twitch tables
+        assert "twitch_" in content
+
+    def test_backup_with_custom_prefix(self, tmp_path: Path) -> None:
+        """Test that custom prefix is used in filename."""
+        # Create test data so tables exist
+        Organization.objects.create(twitch_id="test002", name="Test Org")
+
+        output_dir = tmp_path / "backups"
+        output_dir.mkdir()
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="custom")
+
+        backup_files = list(output_dir.glob("custom-*.sql.zst"))
+        assert len(backup_files) == 1
+
+    def test_backup_creates_output_directory(self, tmp_path: Path) -> None:
+        """Test that backup command creates output directory if missing."""
+        # Create test data so tables exist
+        Organization.objects.create(twitch_id="test003", name="Test Org")
+
+        output_dir = tmp_path / "nonexistent" / "backups"
+
+        call_command("backup_db", output_dir=str(output_dir), prefix="test")
+
+        assert output_dir.exists()
+        assert len(list(output_dir.glob("test-*.sql.zst"))) == 1
+
+    def test_backup_uses_default_directory(self) -> None:
+        """Test that backup uses DATA_DIR/datasets by default."""
+        # Create test data so tables exist
+        Organization.objects.create(twitch_id="test004", name="Test Org")
+
+        datasets_dir = settings.DATA_DIR / "datasets"
+        datasets_dir.mkdir(exist_ok=True, parents=True)
+
+        # Clean up any existing test backups
+        for old_backup in datasets_dir.glob("ttvdrops-*.sql.zst"):
+            old_backup.unlink()
+
+        call_command("backup_db")
+
+        backup_files = list(datasets_dir.glob("ttvdrops-*.sql.zst"))
+        assert len(backup_files) >= 1
+
+        # Clean up
+        for backup in backup_files:
+            backup.unlink()
+
+
+@pytest.mark.django_db
+class TestBackupHelperFunctions:
+    """Tests for backup command helper functions."""
+
+    def test_get_allowed_tables_filters_by_prefix(self) -> None:
+        """Test that _get_allowed_tables returns only matching tables."""
+        # Use Django's connection to access the test database
+        db_connection = connection.connection
+        tables = _get_allowed_tables(db_connection, "twitch_")
+
+        assert len(tables) > 0
+        assert all(table.startswith("twitch_") for table in tables)
+        assert "twitch_organization" in tables
+        assert "twitch_game" in tables
+
+    def test_get_allowed_tables_excludes_non_matching(self) -> None:
+        """Test that _get_allowed_tables excludes non-matching tables."""
+        # Use Django's connection to access the test database
+        db_connection = connection.connection
+        tables = _get_allowed_tables(db_connection, "twitch_")
+
+        # Should not include django, silk, or debug toolbar tables
+        assert not any(table.startswith("django_") for table in tables)
+        assert not any(table.startswith("silk_") for table in tables)
+        assert not any(table.startswith("debug_toolbar_") for table in tables)
+
+    def test_sql_literal_handles_none(self) -> None:
+        """Test _sql_literal converts None to NULL."""
+        assert _sql_literal(None) == "NULL"
+
+    def test_sql_literal_handles_booleans(self) -> None:
+        """Test _sql_literal converts booleans to 1/0."""
+        assert _sql_literal(True) == "1"
+        assert _sql_literal(False) == "0"
+
+    def test_sql_literal_handles_numbers(self) -> None:
+        """Test _sql_literal handles int and float."""
+        assert _sql_literal(42) == "42"
+        assert _sql_literal(math.pi) == str(math.pi)
+
+    def test_sql_literal_handles_strings(self) -> None:
+        """Test _sql_literal quotes and escapes strings."""
+        assert _sql_literal("test") == "'test'"
+        assert _sql_literal("o'reilly") == "'o''reilly'"
+        assert _sql_literal("test\nline") == "'test\nline'"
+
+    def test_sql_literal_handles_bytes(self) -> None:
+        """Test _sql_literal converts bytes to hex notation."""
+        assert _sql_literal(b"\x00\x01\x02") == "X'000102'"
+        assert _sql_literal(b"hello") == "X'68656c6c6f'"
+
+    def test_write_dump_includes_schema_and_data(self) -> None:
+        """Test _write_dump writes complete SQL dump."""
+        # Create test data
+        Organization.objects.create(twitch_id="test789", name="Write Test Org")
+
+        # Use Django's connection to access the test database
+        db_connection = connection.connection
+        output = io.StringIO()
+
+        tables = _get_allowed_tables(db_connection, "twitch_")
+        _write_dump(output, db_connection, tables)
+
+        content = output.getvalue()
+
+        # Check for SQL structure
+        assert "PRAGMA foreign_keys=OFF;" in content
+        assert "BEGIN TRANSACTION;" in content
+        assert "COMMIT;" in content
+        assert "PRAGMA foreign_keys=ON;" in content
+
+        # Check for schema
+        assert "CREATE TABLE" in content
+        assert "twitch_organization" in content
+
+        # Check for data
+        assert "INSERT INTO" in content
+        assert "Write Test Org" in content
+
+
+@pytest.mark.django_db
+class TestDatasetBackupViews:
+    """Tests for dataset backup list and download views."""
+
+    @pytest.fixture
+    def datasets_dir(self, tmp_path: Path) -> Path:
+        """Create a temporary datasets directory.
+
+        Returns:
+            Path to the created datasets directory.
+        """
+        datasets_dir = tmp_path / "datasets"
+        datasets_dir.mkdir()
+        return datasets_dir
+
+    @pytest.fixture
+    def sample_backup(self, datasets_dir: Path) -> Path:
+        """Create a sample backup file.
+
+        Returns:
+            Path to the created backup file.
+        """
+        backup_file = datasets_dir / "ttvdrops-20260210-120000.sql.zst"
+        with (
+            backup_file.open("wb") as raw_handle,
+            zstd.open(raw_handle, "w") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            handle.write("-- Sample backup content\n")
+        return backup_file
+
+    def test_dataset_list_view_shows_backups(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        sample_backup: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test that dataset list view displays backup files."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
+
+        assert response.status_code == 200
+        assert b"ttvdrops-20260210-120000.sql.zst" in response.content
+        assert b"1 datasets" in response.content or b"1 dataset" in response.content
+
+    def test_dataset_list_view_empty_directory(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test dataset list view with empty directory."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
+
+        assert response.status_code == 200
+        assert b"No dataset backups found" in response.content
+
+    def test_dataset_list_view_sorts_by_date(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test that backups are sorted by modification time."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        # Create multiple backup files with different timestamps
+        older_backup = datasets_dir / "ttvdrops-20260210-100000.sql.zst"
+        newer_backup = datasets_dir / "ttvdrops-20260210-140000.sql.zst"
+
+        for backup in [older_backup, newer_backup]:
+            with (
+                backup.open("wb") as raw_handle,
+                zstd.open(raw_handle, "w") as compressed,
+                io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+            ):
+                handle.write("-- Test\n")
+
+        # Set explicit modification times to ensure proper sorting
+        older_time = 1707561600  # 2024-02-10 10:00:00 UTC
+        newer_time = 1707575400  # 2024-02-10 14:00:00 UTC
+        os.utime(older_backup, (older_time, older_time))
+        os.utime(newer_backup, (newer_time, newer_time))
+
+        response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
+
+        content = response.content.decode()
+        newer_pos = content.find("20260210-140000")
+        older_pos = content.find("20260210-100000")
+
+        # Newer backup should appear first (sorted descending)
+        assert 0 < newer_pos < older_pos
+
+    def test_dataset_download_view_success(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        sample_backup: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test successful backup download."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        response: _MonkeyPatchedWSGIResponse = client.get(
+            reverse("twitch:dataset_backup_download", args=["ttvdrops-20260210-120000.sql.zst"]),
+        )
+
+        assert response.status_code == 200
+        # FileResponse may use application/x-compressed for .zst files
+        assert "attachment" in response["Content-Disposition"]
+        assert "ttvdrops-20260210-120000.sql.zst" in response["Content-Disposition"]
+
+    def test_dataset_download_prevents_path_traversal(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test that path traversal attempts are blocked."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        # Attempt path traversal
+        response = client.get(reverse("twitch:dataset_backup_download", args=["../../../etc/passwd"]))
+        assert response.status_code == 404
+
+    def test_dataset_download_rejects_invalid_extensions(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test that files with invalid extensions cannot be downloaded."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        # Create a file with invalid extension
+        invalid_file = datasets_dir / "malicious.exe"
+        invalid_file.write_text("not a backup")
+
+        response = client.get(reverse("twitch:dataset_backup_download", args=["malicious.exe"]))
+        assert response.status_code == 404
+
+    def test_dataset_download_file_not_found(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test download returns 404 for non-existent file."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        response = client.get(reverse("twitch:dataset_backup_download", args=["nonexistent.sql.zst"]))
+        assert response.status_code == 404
+
+    def test_dataset_list_view_shows_file_sizes(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        sample_backup: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test that file sizes are displayed in human-readable format."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
+
+        assert response.status_code == 200
+        # Should contain size information (bytes, KB, MB, or GB)
+        content = response.content.decode()
+        assert any(unit in content for unit in ["bytes", "KB", "MB", "GB"])
+
+    def test_dataset_list_ignores_non_zst_files(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test that non-zst files are ignored in listing."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        # Create various file types
+        (datasets_dir / "backup.sql.zst").write_bytes(b"valid")
+        (datasets_dir / "readme.txt").write_text("should be ignored")
+        (datasets_dir / "old_backup.gz").write_bytes(b"should be ignored")
+
+        response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
+
+        content = response.content.decode()
+        assert "backup.sql.zst" in content
+        assert "readme.txt" not in content
+        assert "old_backup.gz" not in content
+
+    def test_dataset_download_view_handles_subdirectories(
+        self,
+        client: Client,
+        datasets_dir: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Test download works with files in subdirectories."""
+        monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
+
+        # Create subdirectory with backup
+        subdir = datasets_dir / "2026" / "02"
+        subdir.mkdir(parents=True)
+        backup_file = subdir / "backup.sql.zst"
+        with (
+            backup_file.open("wb") as raw_handle,
+            zstd.open(raw_handle, "w") as compressed,
+            io.TextIOWrapper(compressed, encoding="utf-8") as handle,
+        ):
+            handle.write("-- Test\n")
+
+        response: _MonkeyPatchedWSGIResponse = client.get(
+            reverse("twitch:dataset_backup_download", args=["2026/02/backup.sql.zst"]),
+        )
+
+        assert response.status_code == 200
+        assert "attachment" in response["Content-Disposition"]