diff --git a/core/views.py b/core/views.py index 352109d..6d1d9cf 100644 --- a/core/views.py +++ b/core/views.py @@ -645,8 +645,8 @@ def dataset_backups_view(request: HttpRequest) -> HttpResponse: datasets.sort(key=operator.itemgetter("updated_at"), reverse=True) seo_context: dict[str, Any] = _build_seo_context( - page_title="Twitch/Kick drop data", - page_description="Twitch/Kick datasets available for download, including historical drop campaign data and more.", + page_title="Twitch Dataset", + page_description="Database backups and datasets available for download.", ) context: dict[str, Any] = { "datasets": datasets, diff --git a/twitch/management/commands/backup_db.py b/twitch/management/commands/backup_db.py index b1b380d..c708cbb 100644 --- a/twitch/management/commands/backup_db.py +++ b/twitch/management/commands/backup_db.py @@ -1,6 +1,4 @@ -import csv import io -import json import os import shutil import subprocess # noqa: S404 @@ -21,9 +19,9 @@ if TYPE_CHECKING: class Command(BaseCommand): - """Create a compressed SQL dump of the Twitch and Kick dataset tables.""" + """Create a compressed SQL dump of the Twitch dataset tables.""" - help = "Create a compressed SQL dump of the Twitch and Kick dataset tables." + help = "Create a compressed SQL dump of the Twitch dataset tables." def add_arguments(self, parser: ArgumentParser) -> None: """Define arguments for the backup command.""" @@ -61,14 +59,9 @@ class Command(BaseCommand): timestamp: str = timezone.localtime(timezone.now()).strftime("%Y%m%d-%H%M%S") output_path: Path = output_dir / f"{prefix}-{timestamp}.sql.zst" - allowed_tables = sorted({ - *_get_allowed_tables("twitch_"), - *_get_allowed_tables("kick_"), - }) + allowed_tables = _get_allowed_tables("twitch_") if not allowed_tables: - self.stdout.write( - self.style.WARNING("No twitch or kick tables found to back up."), - ) + self.stdout.write(self.style.WARNING("No twitch tables found to back up.")) return if django_connection.vendor == "postgresql": @@ -84,16 +77,6 @@ class Command(BaseCommand): msg = f"Unsupported database backend: {django_connection.vendor}" raise CommandError(msg) - json_path: Path = output_dir / f"{prefix}-{timestamp}.json.zst" - _write_json_dump(json_path, allowed_tables) - - csv_paths: list[Path] = _write_csv_dumps( - output_dir, - prefix, - timestamp, - allowed_tables, - ) - created_at: datetime = datetime.fromtimestamp( output_path.stat().st_mtime, tz=timezone.get_current_timezone(), @@ -103,10 +86,6 @@ class Command(BaseCommand): f"Backup created: {output_path} (updated {created_at.isoformat()})", ), ) - self.stdout.write(self.style.SUCCESS(f"JSON backup created: {json_path}")) - self.stdout.write( - self.style.SUCCESS(f"CSV backups created: {len(csv_paths)} files"), - ) self.stdout.write(self.style.SUCCESS(f"Included tables: {len(allowed_tables)}")) @@ -314,77 +293,3 @@ def _sql_literal(value: object) -> str: if isinstance(value, bytes): return "X'" + value.hex() + "'" return "'" + str(value).replace("'", "''") + "'" - - -def _json_default(value: object) -> str: - """Convert non-serializable values to JSON-compatible strings. - - Args: - value: Value to convert. - - Returns: - String representation. - """ - if isinstance(value, bytes): - return value.hex() - return str(value) - - -def _write_json_dump(output_path: Path, tables: list[str]) -> None: - """Write a JSON dump of all tables into a zstd-compressed file. - - Args: - output_path: Destination path for the zstd file. - tables: Table names to include. - """ - data: dict[str, list[dict]] = {} - with django_connection.cursor() as cursor: - for table in tables: - cursor.execute(f'SELECT * FROM "{table}"') # noqa: S608 - columns: list[str] = [col[0] for col in cursor.description] - rows = cursor.fetchall() - data[table] = [dict(zip(columns, row, strict=False)) for row in rows] - - with ( - output_path.open("wb") as raw_handle, - zstd.open(raw_handle, "w") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - json.dump(data, handle, default=_json_default) - - -def _write_csv_dumps( - output_dir: Path, - prefix: str, - timestamp: str, - tables: list[str], -) -> list[Path]: - """Write per-table CSV files into zstd-compressed files. - - Args: - output_dir: Directory where CSV files will be written. - prefix: Filename prefix. - timestamp: Timestamp string for filenames. - tables: Table names to include. - - Returns: - List of created file paths. - """ - paths: list[Path] = [] - with django_connection.cursor() as cursor: - for table in tables: - cursor.execute(f'SELECT * FROM "{table}"') # noqa: S608 - columns: list[str] = [col[0] for col in cursor.description] - rows: list[tuple] = cursor.fetchall() - - output_path: Path = output_dir / f"{prefix}-{timestamp}-{table}.csv.zst" - with ( - output_path.open("wb") as raw_handle, - zstd.open(raw_handle, "w") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - writer: csv.Writer = csv.writer(handle) - writer.writerow(columns) - writer.writerows(rows) - paths.append(output_path) - return paths diff --git a/twitch/tests/test_backup.py b/twitch/tests/test_backup.py index 40e88ca..08d8c4f 100644 --- a/twitch/tests/test_backup.py +++ b/twitch/tests/test_backup.py @@ -1,11 +1,8 @@ -import csv import io -import json import math import os import shutil from compression import zstd -from datetime import datetime as dt from typing import TYPE_CHECKING import pytest @@ -15,18 +12,13 @@ from django.db import connection from django.urls import reverse from twitch.management.commands.backup_db import _get_allowed_tables -from twitch.management.commands.backup_db import _json_default from twitch.management.commands.backup_db import _sql_literal -from twitch.management.commands.backup_db import _write_csv_dumps -from twitch.management.commands.backup_db import _write_json_dump from twitch.management.commands.backup_db import _write_postgres_dump from twitch.management.commands.backup_db import _write_sqlite_dump from twitch.models import Game from twitch.models import Organization if TYPE_CHECKING: - from csv import Reader - from datetime import datetime from pathlib import Path from django.test import Client @@ -92,39 +84,34 @@ class TestBackupCommand: assert "twitch_game" in content assert "Test Org" in content - def test_backup_excludes_non_app_tables(self, tmp_path: Path) -> None: - """Test that backup includes app tables and excludes non-app tables.""" + def test_backup_excludes_non_twitch_tables(self, tmp_path: Path) -> None: + """Test that backup only includes twitch_ prefixed tables.""" _skip_if_pg_dump_missing() # Create test data so tables exist Organization.objects.create(twitch_id="test001", name="Test Org") - output_dir: Path = tmp_path / "backups" + output_dir = tmp_path / "backups" output_dir.mkdir() call_command("backup_db", output_dir=str(output_dir), prefix="test") - backup_file: Path = next(iter(output_dir.glob("test-*.sql.zst"))) + backup_file = next(iter(output_dir.glob("test-*.sql.zst"))) with ( backup_file.open("rb") as raw_handle, zstd.open(raw_handle, "r") as compressed, io.TextIOWrapper(compressed, encoding="utf-8") as handle, ): - content: str = handle.read() + content = handle.read() # Should NOT contain django admin, silk, or debug toolbar tables assert "django_session" not in content - assert "django_migrations" not in content - assert "django_content_type" not in content assert "silk_" not in content assert "debug_toolbar_" not in content assert "django_admin_log" not in content - assert "auth_" not in content - assert "youtube_" not in content - # Should contain twitch and kick tables + # Should contain twitch tables assert "twitch_" in content - assert "kick_" in content def test_backup_with_custom_prefix(self, tmp_path: Path) -> None: """Test that custom prefix is used in filename.""" @@ -172,59 +159,6 @@ class TestBackupCommand: backup_files = list(datasets_dir.glob("ttvdrops-*.sql.zst")) assert len(backup_files) >= 1 - def test_backup_creates_json_file(self, tmp_path: Path) -> None: - """Test that backup command creates a JSON file alongside the SQL dump.""" - _skip_if_pg_dump_missing() - Organization.objects.create(twitch_id="test_json", name="Test Org JSON") - - output_dir: Path = tmp_path / "backups" - output_dir.mkdir() - - call_command("backup_db", output_dir=str(output_dir), prefix="test") - - json_files: list[Path] = list(output_dir.glob("test-*.json.zst")) - assert len(json_files) == 1 - - with ( - json_files[0].open("rb") as raw_handle, - zstd.open(raw_handle, "r") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - data = json.load(handle) - - assert isinstance(data, dict) - assert "twitch_organization" in data - assert any( - row.get("name") == "Test Org JSON" for row in data["twitch_organization"] - ) - - def test_backup_creates_csv_files(self, tmp_path: Path) -> None: - """Test that backup command creates per-table CSV files alongside the SQL dump.""" - _skip_if_pg_dump_missing() - Organization.objects.create(twitch_id="test_csv", name="Test Org CSV") - - output_dir: Path = tmp_path / "backups" - output_dir.mkdir() - - call_command("backup_db", output_dir=str(output_dir), prefix="test") - - org_csv_files: list[Path] = list( - output_dir.glob("test-*-twitch_organization.csv.zst"), - ) - assert len(org_csv_files) == 1 - - with ( - org_csv_files[0].open("rb") as raw_handle, - zstd.open(raw_handle, "r") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - reader: Reader = csv.reader(handle) - rows: list[list[str]] = list(reader) - - assert len(rows) >= 2 # header + at least one data row - assert "name" in rows[0] - assert any("Test Org CSV" in row for row in rows[1:]) - @pytest.mark.django_db class TestBackupHelperFunctions: @@ -311,71 +245,6 @@ class TestBackupHelperFunctions: assert "INSERT INTO" in content assert "Write Test Org" in content - def test_write_json_dump_creates_valid_json(self, tmp_path: Path) -> None: - """Test _write_json_dump creates valid compressed JSON with all tables.""" - Organization.objects.create( - twitch_id="test_json_helper", - name="JSON Helper Org", - ) - - tables: list[str] = _get_allowed_tables("twitch_") - output_path: Path = tmp_path / "backup.json.zst" - _write_json_dump(output_path, tables) - - with ( - output_path.open("rb") as raw_handle, - zstd.open(raw_handle, "r") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - data = json.load(handle) - - assert isinstance(data, dict) - assert "twitch_organization" in data - assert all(table in data for table in tables) - assert any( - row.get("name") == "JSON Helper Org" for row in data["twitch_organization"] - ) - - def test_write_csv_dumps_creates_per_table_files(self, tmp_path: Path) -> None: - """Test _write_csv_dumps creates one compressed CSV file per table.""" - Organization.objects.create(twitch_id="test_csv_helper", name="CSV Helper Org") - - tables: list[str] = _get_allowed_tables("twitch_") - paths: list[Path] = _write_csv_dumps( - tmp_path, - "test", - "20260317-120000", - tables, - ) - - assert len(paths) == len(tables) - assert all(p.exists() for p in paths) - - org_csv: Path = tmp_path / "test-20260317-120000-twitch_organization.csv.zst" - assert org_csv.exists() - - with ( - org_csv.open("rb") as raw_handle, - zstd.open(raw_handle, "r") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - reader: Reader = csv.reader(handle) - rows: list[list[str]] = list(reader) - - assert len(rows) >= 2 # header + at least one data row - assert "name" in rows[0] - assert any("CSV Helper Org" in row for row in rows[1:]) - - def test_json_default_handles_bytes(self) -> None: - """Test _json_default converts bytes to hex string.""" - assert _json_default(b"\x00\x01") == "0001" - assert _json_default(b"hello") == "68656c6c6f" - - def test_json_default_handles_other_types(self) -> None: - """Test _json_default falls back to str() for other types.""" - value: datetime = dt(2026, 3, 17, 12, 0, 0, tzinfo=dt.now().astimezone().tzinfo) - assert _json_default(value) == str(value) - @pytest.mark.django_db class TestDatasetBackupViews: