Normalize Twitch box art URLs for higher quality and implement download command

This commit is contained in:
Joakim Hellsén 2026-02-11 23:49:58 +01:00
commit f4925b8e45
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
9 changed files with 222 additions and 10 deletions

View file

@ -8,11 +8,14 @@ from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Literal
from urllib.parse import urlparse
import httpx
import json_repair
from colorama import Fore
from colorama import Style
from colorama import init as colorama_init
from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.management.base import CommandParser
@ -40,6 +43,8 @@ from twitch.schemas import GraphQLResponse
from twitch.schemas import OrganizationSchema
from twitch.schemas import RewardCampaign as RewardCampaignSchema
from twitch.schemas import TimeBasedDropSchema
from twitch.utils import is_twitch_box_art_url
from twitch.utils import normalize_twitch_box_art_url
from twitch.utils import parse_date
@ -642,6 +647,7 @@ class Command(BaseCommand):
update_fields.append("box_art")
if update_fields:
game_obj.save(update_fields=update_fields)
self._download_game_box_art(game_obj, game_data.box_art_url or game_obj.box_art)
return game_obj
game_obj, created = Game.objects.update_or_create(
@ -659,8 +665,34 @@ class Command(BaseCommand):
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new game: {game_data.display_name}")
self.game_cache[game_data.twitch_id] = game_obj
self._download_game_box_art(game_obj, game_obj.box_art)
return game_obj
def _download_game_box_art(self, game_obj: Game, box_art_url: str | None) -> None:
"""Download and cache Twitch box art locally when possible."""
if not box_art_url:
return
if not is_twitch_box_art_url(box_art_url):
return
if game_obj.box_art_file and getattr(game_obj.box_art_file, "name", ""):
return
normalized_url: str = normalize_twitch_box_art_url(box_art_url)
parsed_url = urlparse(normalized_url)
suffix: str = Path(parsed_url.path).suffix or ".jpg"
file_name: str = f"{game_obj.twitch_id}{suffix}"
try:
response = httpx.get(normalized_url, timeout=20)
response.raise_for_status()
except httpx.HTTPError as exc:
tqdm.write(
f"{Fore.YELLOW}!{Style.RESET_ALL} Failed to download box art for {game_obj.twitch_id}: {exc}",
)
return
game_obj.box_art_file.save(file_name, ContentFile(response.content), save=True)
def _get_or_create_channel(self, channel_info: ChannelInfoSchema) -> Channel:
"""Get or create a channel from cache or database.

View file

@ -0,0 +1,101 @@
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import ParseResult
from urllib.parse import urlparse
import httpx
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand
from django.core.management.base import CommandParser
from twitch.models import Game
from twitch.utils import is_twitch_box_art_url
from twitch.utils import normalize_twitch_box_art_url
if TYPE_CHECKING:
from django.db.models import QuerySet
class Command(BaseCommand):
"""Download and cache Twitch game box art locally."""
help = "Download and cache Twitch game box art locally."
def add_arguments(self, parser: CommandParser) -> None:
"""Register command arguments."""
parser.add_argument(
"--limit",
type=int,
default=None,
help="Limit the number of games to process.",
)
parser.add_argument(
"--force",
action="store_true",
help="Re-download even if a local box art file already exists.",
)
def handle(self, *_args: object, **options: object) -> None:
"""Download Twitch box art images for all games."""
limit_value: object | None = options.get("limit")
limit: int | None = limit_value if isinstance(limit_value, int) else None
force: bool = bool(options.get("force"))
queryset: QuerySet[Game] = Game.objects.all().order_by("twitch_id")
if limit:
queryset = queryset[:limit]
total: int = queryset.count()
downloaded: int = 0
skipped: int = 0
failed: int = 0
placeholders_404: int = 0
with httpx.Client(timeout=20, follow_redirects=True) as client:
for game in queryset:
if not game.box_art:
skipped += 1
continue
if not is_twitch_box_art_url(game.box_art):
skipped += 1
continue
if game.box_art_file and getattr(game.box_art_file, "name", "") and not force:
skipped += 1
continue
normalized_url: str = normalize_twitch_box_art_url(game.box_art)
parsed_url: ParseResult = urlparse(normalized_url)
suffix: str = Path(parsed_url.path).suffix or ".jpg"
file_name: str = f"{game.twitch_id}{suffix}"
try:
response: httpx.Response = client.get(normalized_url)
response.raise_for_status()
except httpx.HTTPError as exc:
failed += 1
self.stdout.write(
self.style.WARNING(
f"Failed to download box art for {game.twitch_id}: {exc}",
),
)
continue
if response.url.path.endswith("/ttv-static/404_boxart.jpg"):
placeholders_404 += 1
skipped += 1
continue
game.box_art_file.save(file_name, ContentFile(response.content), save=True)
downloaded += 1
self.stdout.write(
self.style.SUCCESS(
f"Processed {total} games. Downloaded: {downloaded}, skipped: {skipped}, "
f"404 placeholders: {placeholders_404}, failed: {failed}.",
),
)
box_art_dir: Path = Path(settings.MEDIA_ROOT) / "games" / "box_art"
self.stdout.write(self.style.SUCCESS(f"Saved box art to: {box_art_dir}"))