Improve sitemaps
All checks were successful
Deploy to Server / deploy (push) Successful in 9s

This commit is contained in:
Joakim Hellsén 2026-02-27 06:02:30 +01:00
commit 415dd12fd9
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
16 changed files with 843 additions and 379 deletions

View file

@ -5,6 +5,7 @@ import math
import os
import shutil
from compression import zstd
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
@ -21,6 +22,7 @@ from twitch.models import Game
from twitch.models import Organization
if TYPE_CHECKING:
import sqlite3
from pathlib import Path
from django.test import Client
@ -42,12 +44,12 @@ class TestBackupCommand:
# Create test data so tables exist
Organization.objects.create(twitch_id="test000", name="Test Org")
output_dir = tmp_path / "backups"
output_dir: Path = tmp_path / "backups"
output_dir.mkdir()
call_command("backup_db", output_dir=str(output_dir), prefix="test")
backup_files = list(output_dir.glob("test-*.sql.zst"))
backup_files: list[Path] = list(output_dir.glob("test-*.sql.zst"))
assert len(backup_files) == 1
assert backup_files[0].exists()
assert backup_files[0].stat().st_size > 0
@ -55,17 +57,17 @@ class TestBackupCommand:
def test_backup_contains_sql_content(self, tmp_path: Path) -> None:
"""Test that backup file contains valid SQL content."""
_skip_if_pg_dump_missing()
output_dir = tmp_path / "backups"
output_dir: Path = tmp_path / "backups"
output_dir.mkdir()
# Create some test data
org = Organization.objects.create(twitch_id="test123", name="Test Org")
game = Game.objects.create(twitch_id="game456", display_name="Test Game")
org: Organization = Organization.objects.create(twitch_id="test123", name="Test Org")
game: Game = Game.objects.create(twitch_id="game456", display_name="Test Game")
game.owners.add(org)
call_command("backup_db", output_dir=str(output_dir), prefix="test")
backup_file = next(iter(output_dir.glob("test-*.sql.zst")))
backup_file: Path = next(iter(output_dir.glob("test-*.sql.zst")))
# Decompress and read content
with (
@ -73,7 +75,7 @@ class TestBackupCommand:
zstd.open(raw_handle, "r") as compressed,
io.TextIOWrapper(compressed, encoding="utf-8") as handle,
):
content = handle.read()
content: str = handle.read()
if connection.vendor == "postgresql":
assert "CREATE TABLE" in content
@ -92,19 +94,19 @@ class TestBackupCommand:
# Create test data so tables exist
Organization.objects.create(twitch_id="test001", name="Test Org")
output_dir = tmp_path / "backups"
output_dir: Path = tmp_path / "backups"
output_dir.mkdir()
call_command("backup_db", output_dir=str(output_dir), prefix="test")
backup_file = next(iter(output_dir.glob("test-*.sql.zst")))
backup_file: Path = next(iter(output_dir.glob("test-*.sql.zst")))
with (
backup_file.open("rb") as raw_handle,
zstd.open(raw_handle, "r") as compressed,
io.TextIOWrapper(compressed, encoding="utf-8") as handle,
):
content = handle.read()
content: str = handle.read()
# Should NOT contain django admin, silk, or debug toolbar tables
assert "django_session" not in content
@ -121,12 +123,12 @@ class TestBackupCommand:
# Create test data so tables exist
Organization.objects.create(twitch_id="test002", name="Test Org")
output_dir = tmp_path / "backups"
output_dir: Path = tmp_path / "backups"
output_dir.mkdir()
call_command("backup_db", output_dir=str(output_dir), prefix="custom")
backup_files = list(output_dir.glob("custom-*.sql.zst"))
backup_files: list[Path] = list(output_dir.glob("custom-*.sql.zst"))
assert len(backup_files) == 1
def test_backup_creates_output_directory(self, tmp_path: Path) -> None:
@ -135,7 +137,7 @@ class TestBackupCommand:
# Create test data so tables exist
Organization.objects.create(twitch_id="test003", name="Test Org")
output_dir = tmp_path / "nonexistent" / "backups"
output_dir: Path = tmp_path / "nonexistent" / "backups"
call_command("backup_db", output_dir=str(output_dir), prefix="test")
@ -149,12 +151,12 @@ class TestBackupCommand:
Organization.objects.create(twitch_id="test004", name="Test Org")
monkeypatch.setattr(settings, "DATA_DIR", tmp_path)
datasets_dir = tmp_path / "datasets"
datasets_dir: Path = tmp_path / "datasets"
datasets_dir.mkdir(exist_ok=True, parents=True)
call_command("backup_db")
backup_files = list(datasets_dir.glob("ttvdrops-*.sql.zst"))
backup_files: list[Path] = list(datasets_dir.glob("ttvdrops-*.sql.zst"))
assert len(backup_files) >= 1
@ -165,7 +167,7 @@ class TestBackupHelperFunctions:
def test_get_allowed_tables_filters_by_prefix(self) -> None:
"""Test that _get_allowed_tables returns only matching tables."""
# Use Django's connection to access the test database
tables = _get_allowed_tables("twitch_")
tables: list[str] = _get_allowed_tables("twitch_")
assert len(tables) > 0
assert all(table.startswith("twitch_") for table in tables)
@ -175,7 +177,7 @@ class TestBackupHelperFunctions:
def test_get_allowed_tables_excludes_non_matching(self) -> None:
"""Test that _get_allowed_tables excludes non-matching tables."""
# Use Django's connection to access the test database
tables = _get_allowed_tables("twitch_")
tables: list[str] = _get_allowed_tables("twitch_")
# Should not include django, silk, or debug toolbar tables
assert not any(table.startswith("django_") for table in tables)
@ -212,25 +214,25 @@ class TestBackupHelperFunctions:
# Create test data
Organization.objects.create(twitch_id="test789", name="Write Test Org")
tables = _get_allowed_tables("twitch_")
tables: list[str] = _get_allowed_tables("twitch_")
if connection.vendor == "postgresql":
if not shutil.which("pg_dump"):
pytest.skip("pg_dump is not available")
output_path = tmp_path / "backup.sql.zst"
output_path: Path = tmp_path / "backup.sql.zst"
_write_postgres_dump(output_path, tables)
with (
output_path.open("rb") as raw_handle,
zstd.open(raw_handle, "r") as compressed,
io.TextIOWrapper(compressed, encoding="utf-8") as handle,
):
content = handle.read()
content: str = handle.read()
assert "CREATE TABLE" in content
assert "INSERT INTO" in content
assert "twitch_organization" in content
assert "Write Test Org" in content
else:
db_connection = connection.connection
db_connection: sqlite3.Connection = connection.connection
output = io.StringIO()
_write_sqlite_dump(output, db_connection, tables)
content = output.getvalue()
@ -255,7 +257,7 @@ class TestDatasetBackupViews:
Returns:
Path to the created datasets directory.
"""
datasets_dir = tmp_path / "datasets"
datasets_dir: Path = tmp_path / "datasets"
datasets_dir.mkdir()
return datasets_dir
@ -266,7 +268,7 @@ class TestDatasetBackupViews:
Returns:
Path to the created backup file.
"""
backup_file = datasets_dir / "ttvdrops-20260210-120000.sql.zst"
backup_file: Path = datasets_dir / "ttvdrops-20260210-120000.sql.zst"
with (
backup_file.open("wb") as raw_handle,
zstd.open(raw_handle, "w") as compressed,
@ -315,8 +317,8 @@ class TestDatasetBackupViews:
monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
# Create multiple backup files with different timestamps
older_backup = datasets_dir / "ttvdrops-20260210-100000.sql.zst"
newer_backup = datasets_dir / "ttvdrops-20260210-140000.sql.zst"
older_backup: Path = datasets_dir / "ttvdrops-20260210-100000.sql.zst"
newer_backup: Path = datasets_dir / "ttvdrops-20260210-140000.sql.zst"
for backup in [older_backup, newer_backup]:
with (
@ -334,9 +336,9 @@ class TestDatasetBackupViews:
response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
content = response.content.decode()
newer_pos = content.find("20260210-140000")
older_pos = content.find("20260210-100000")
content: str = response.content.decode()
newer_pos: int = content.find("20260210-140000")
older_pos: int = content.find("20260210-100000")
# Newer backup should appear first (sorted descending)
assert 0 < newer_pos < older_pos
@ -370,7 +372,9 @@ class TestDatasetBackupViews:
monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
# Attempt path traversal
response = client.get(reverse("twitch:dataset_backup_download", args=["../../../etc/passwd"]))
response: _MonkeyPatchedWSGIResponse = client.get(
reverse("twitch:dataset_backup_download", args=["../../../etc/passwd"]),
)
assert response.status_code == 404
def test_dataset_download_rejects_invalid_extensions(
@ -383,10 +387,12 @@ class TestDatasetBackupViews:
monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
# Create a file with invalid extension
invalid_file = datasets_dir / "malicious.exe"
invalid_file.write_text("not a backup")
invalid_file: Path = datasets_dir / "malicious.exe"
invalid_file.write_text("not a backup", encoding="utf-8")
response = client.get(reverse("twitch:dataset_backup_download", args=["malicious.exe"]))
response: _MonkeyPatchedWSGIResponse = client.get(
reverse("twitch:dataset_backup_download", args=["malicious.exe"]),
)
assert response.status_code == 404
def test_dataset_download_file_not_found(
@ -398,7 +404,9 @@ class TestDatasetBackupViews:
"""Test download returns 404 for non-existent file."""
monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
response = client.get(reverse("twitch:dataset_backup_download", args=["nonexistent.sql.zst"]))
response: _MonkeyPatchedWSGIResponse = client.get(
reverse("twitch:dataset_backup_download", args=["nonexistent.sql.zst"]),
)
assert response.status_code == 404
def test_dataset_list_view_shows_file_sizes(
@ -414,8 +422,9 @@ class TestDatasetBackupViews:
response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
assert response.status_code == 200
# Should contain size information (bytes, KB, MB, or GB)
content = response.content.decode()
content: str = response.content.decode()
assert any(unit in content for unit in ["bytes", "KB", "MB", "GB"])
def test_dataset_list_ignores_non_zst_files(
@ -434,7 +443,7 @@ class TestDatasetBackupViews:
response: _MonkeyPatchedWSGIResponse = client.get(reverse("twitch:dataset_backups"))
content = response.content.decode()
content: str = response.content.decode()
assert "backup.sql.zst" in content
assert "readme.txt" not in content
assert "old_backup.gz" not in content
@ -449,7 +458,7 @@ class TestDatasetBackupViews:
monkeypatch.setattr(settings, "DATA_DIR", datasets_dir.parent)
# Create subdirectory with backup
subdir = datasets_dir / "2026" / "02"
subdir: Path = datasets_dir / "2026" / "02"
subdir.mkdir(parents=True)
backup_file = subdir / "backup.sql.zst"
with (