diff --git a/core/views.py b/core/views.py index 352109d..601ce11 100644 --- a/core/views.py +++ b/core/views.py @@ -644,9 +644,56 @@ def dataset_backups_view(request: HttpRequest) -> HttpResponse: datasets.sort(key=operator.itemgetter("updated_at"), reverse=True) + dataset_distributions: list[dict[str, str]] = [] + for dataset in datasets: + download_path: str | None = dataset.get("download_path") + if not download_path: + continue + dataset_distributions.append({ + "@type": "DataDownload", + "name": dataset["name"], + "contentUrl": request.build_absolute_uri( + reverse("core:dataset_backup_download", args=[download_path]), + ), + "encodingFormat": "application/zstd", + }) + + dataset_schema: dict[str, Any] = { + "@context": "https://schema.org", + "@type": "Dataset", + "name": "Historical archive of Twitch and Kick drop data", + "identifier": request.build_absolute_uri(reverse("core:dataset_backups")), + "temporalCoverage": "2024-07-17/..", + "url": request.build_absolute_uri(reverse("core:dataset_backups")), + "license": "https://creativecommons.org/publicdomain/zero/1.0/", + "isAccessibleForFree": True, + "description": ( + "Historical data on Twitch and Kick drops, campaigns, rewards, and more, available for download as compressed SQL files or JSON." + ), + "keywords": [ + "Twitch drops", + "Kick drops", + ], + "creator": { + "@type": "Person", + "givenName": "Joakim", + "familyName": "Hellsén", + "name": "Joakim Hellsén", + "sameAs": "https://orcid.org/0009-0006-7305-524X", + }, + "includedInDataCatalog": { + "@type": "DataCatalog", + "name": "ttvdrops.lovinator.space", + "url": request.build_absolute_uri(reverse("core:dataset_backups")), + }, + } + if dataset_distributions: + dataset_schema["distribution"] = dataset_distributions + seo_context: dict[str, Any] = _build_seo_context( page_title="Twitch/Kick drop data", page_description="Twitch/Kick datasets available for download, including historical drop campaign data and more.", + schema_data=dataset_schema, ) context: dict[str, Any] = { "datasets": datasets, diff --git a/templates/twitch/dataset_backups.html b/templates/twitch/dataset_backups.html index 0cb4064..5837bf6 100644 --- a/templates/twitch/dataset_backups.html +++ b/templates/twitch/dataset_backups.html @@ -31,7 +31,7 @@ {% for dataset in datasets %} - + {{ dataset.name }} diff --git a/twitch/management/commands/backup_db.py b/twitch/management/commands/backup_db.py index 88c7ece..d651cfe 100644 --- a/twitch/management/commands/backup_db.py +++ b/twitch/management/commands/backup_db.py @@ -1,4 +1,3 @@ -import csv import io import json import os @@ -87,13 +86,6 @@ class Command(BaseCommand): json_path: Path = output_dir / f"{prefix}-{timestamp}.json.zst" _write_json_dump(json_path, allowed_tables) - csv_path: Path = _write_csv_dump( - output_dir, - prefix, - timestamp, - allowed_tables, - ) - created_at: datetime = datetime.fromtimestamp( output_path.stat().st_mtime, tz=timezone.get_current_timezone(), @@ -104,7 +96,6 @@ class Command(BaseCommand): ), ) self.stdout.write(self.style.SUCCESS(f"JSON backup created: {json_path}")) - self.stdout.write(self.style.SUCCESS(f"CSV backup created: {csv_path}")) self.stdout.write(self.style.SUCCESS(f"Included tables: {len(allowed_tables)}")) @@ -349,46 +340,3 @@ def _write_json_dump(output_path: Path, tables: list[str]) -> None: io.TextIOWrapper(compressed, encoding="utf-8") as handle, ): json.dump(data, handle, default=_json_default) - - -def _write_csv_dump( - output_dir: Path, - prefix: str, - timestamp: str, - tables: list[str], -) -> Path: - """Write a combined CSV file containing rows from all tables. - - Args: - output_dir: Directory where CSV files will be written. - prefix: Filename prefix. - timestamp: Timestamp string for filenames. - tables: Table names to include. - - Returns: - Created file path. - """ - output_path: Path = output_dir / f"{prefix}-{timestamp}.csv.zst" - - with ( - output_path.open("wb") as raw_handle, - zstd.open(raw_handle, "w") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - writer: csv.Writer = csv.writer(handle) - writer.writerow(["table", "row_json"]) - - with django_connection.cursor() as cursor: - for table in tables: - cursor.execute(f'SELECT * FROM "{table}"') # noqa: S608 - columns: list[str] = [col[0] for col in cursor.description] - rows: list[tuple] = cursor.fetchall() - - for row in rows: - row_dict = dict(zip(columns, row, strict=False)) - writer.writerow([ - table, - json.dumps(row_dict, default=_json_default), - ]) - - return output_path diff --git a/twitch/tests/test_backup.py b/twitch/tests/test_backup.py index 0eb84a4..1123a7a 100644 --- a/twitch/tests/test_backup.py +++ b/twitch/tests/test_backup.py @@ -1,4 +1,3 @@ -import csv import io import json import math @@ -17,7 +16,6 @@ from django.urls import reverse from twitch.management.commands.backup_db import _get_allowed_tables from twitch.management.commands.backup_db import _json_default from twitch.management.commands.backup_db import _sql_literal -from twitch.management.commands.backup_db import _write_csv_dump from twitch.management.commands.backup_db import _write_json_dump from twitch.management.commands.backup_db import _write_postgres_dump from twitch.management.commands.backup_db import _write_sqlite_dump @@ -25,7 +23,6 @@ from twitch.models import Game from twitch.models import Organization if TYPE_CHECKING: - from csv import Reader from datetime import datetime from pathlib import Path @@ -198,34 +195,6 @@ class TestBackupCommand: row.get("name") == "Test Org JSON" for row in data["twitch_organization"] ) - def test_backup_creates_single_csv_file(self, tmp_path: Path) -> None: - """Test that backup command creates a single CSV file alongside the SQL dump.""" - _skip_if_pg_dump_missing() - Organization.objects.create(twitch_id="test_csv", name="Test Org CSV") - - output_dir: Path = tmp_path / "backups" - output_dir.mkdir() - - call_command("backup_db", output_dir=str(output_dir), prefix="test") - - csv_files: list[Path] = list(output_dir.glob("test-*.csv.zst")) - assert len(csv_files) == 1 - - with ( - csv_files[0].open("rb") as raw_handle, - zstd.open(raw_handle, "r") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - reader: Reader = csv.reader(handle) - rows: list[list[str]] = list(reader) - - assert len(rows) >= 2 # header + at least one data row - assert rows[0] == ["table", "row_json"] - data_rows: list[list[str]] = [ - row for row in rows[1:] if row and row[0] == "twitch_organization" - ] - assert any("Test Org CSV" in row[1] for row in data_rows) - @pytest.mark.django_db class TestBackupHelperFunctions: @@ -337,36 +306,6 @@ class TestBackupHelperFunctions: row.get("name") == "JSON Helper Org" for row in data["twitch_organization"] ) - def test_write_csv_dump_creates_single_file(self, tmp_path: Path) -> None: - """Test _write_csv_dump creates one combined compressed CSV file.""" - Organization.objects.create(twitch_id="test_csv_helper", name="CSV Helper Org") - - tables: list[str] = _get_allowed_tables("twitch_") - path: Path = _write_csv_dump( - tmp_path, - "test", - "20260317-120000", - tables, - ) - - assert path.exists() - assert path.name == "test-20260317-120000.csv.zst" - - with ( - path.open("rb") as raw_handle, - zstd.open(raw_handle, "r") as compressed, - io.TextIOWrapper(compressed, encoding="utf-8") as handle, - ): - reader: Reader = csv.reader(handle) - rows: list[list[str]] = list(reader) - - assert len(rows) >= 2 # header + at least one data row - assert rows[0] == ["table", "row_json"] - data_rows: list[list[str]] = [ - row for row in rows[1:] if row and row[0] == "twitch_organization" - ] - assert any("CSV Helper Org" in row[1] for row in data_rows) - def test_json_default_handles_bytes(self) -> None: """Test _json_default converts bytes to hex string.""" assert _json_default(b"\x00\x01") == "0001"