From f4925b8e452ed635e28fd1ba83b968fcd1fc8c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Wed, 11 Feb 2026 23:49:58 +0100 Subject: [PATCH] Normalize Twitch box art URLs for higher quality and implement download command --- templates/twitch/game_detail.html | 4 +- twitch/feeds.py | 4 +- .../commands/better_import_drops.py | 32 ++++++ .../management/commands/download_box_art.py | 101 ++++++++++++++++++ twitch/models.py | 4 +- twitch/schemas.py | 20 ++++ twitch/tests/test_schemas.py | 15 +++ twitch/utils.py | 42 ++++++++ twitch/views.py | 10 +- 9 files changed, 222 insertions(+), 10 deletions(-) create mode 100644 twitch/management/commands/download_box_art.py diff --git a/templates/twitch/game_detail.html b/templates/twitch/game_detail.html index 684658a..3703802 100644 --- a/templates/twitch/game_detail.html +++ b/templates/twitch/game_detail.html @@ -22,11 +22,11 @@ title="RSS feed for all campaigns">RSS feed for all campaigns - {% if game.box_art %} + {% if game.box_art_best_url %} {{ game.name }} {% endif %} diff --git a/twitch/feeds.py b/twitch/feeds.py index c921e4d..4588b06 100644 --- a/twitch/feeds.py +++ b/twitch/feeds.py @@ -411,7 +411,7 @@ class GameFeed(Feed): slug: str = getattr(item, "slug", "") name: str = getattr(item, "name", "") display_name: str = getattr(item, "display_name", "") - box_art: str | None = getattr(item, "box_art", None) + box_art: str = item.box_art_best_url owner: Organization | None = getattr(item, "owner", None) description_parts: list[SafeText] = [] @@ -474,7 +474,7 @@ class GameFeed(Feed): def item_enclosure_url(self, item: Game) -> str: """Returns the URL of the game's box art for enclosure.""" - box_art: str | None = getattr(item, "box_art", None) + box_art: str = item.box_art_best_url if box_art: return box_art return "" diff --git a/twitch/management/commands/better_import_drops.py b/twitch/management/commands/better_import_drops.py index ff4c518..613f982 100644 --- a/twitch/management/commands/better_import_drops.py +++ b/twitch/management/commands/better_import_drops.py @@ -8,11 +8,14 @@ from datetime import datetime from pathlib import Path from typing import Any from typing import Literal +from urllib.parse import urlparse +import httpx import json_repair from colorama import Fore from colorama import Style from colorama import init as colorama_init +from django.core.files.base import ContentFile from django.core.management.base import BaseCommand from django.core.management.base import CommandError from django.core.management.base import CommandParser @@ -40,6 +43,8 @@ from twitch.schemas import GraphQLResponse from twitch.schemas import OrganizationSchema from twitch.schemas import RewardCampaign as RewardCampaignSchema from twitch.schemas import TimeBasedDropSchema +from twitch.utils import is_twitch_box_art_url +from twitch.utils import normalize_twitch_box_art_url from twitch.utils import parse_date @@ -642,6 +647,7 @@ class Command(BaseCommand): update_fields.append("box_art") if update_fields: game_obj.save(update_fields=update_fields) + self._download_game_box_art(game_obj, game_data.box_art_url or game_obj.box_art) return game_obj game_obj, created = Game.objects.update_or_create( @@ -659,8 +665,34 @@ class Command(BaseCommand): if created: tqdm.write(f"{Fore.GREEN}✓{Style.RESET_ALL} Created new game: {game_data.display_name}") self.game_cache[game_data.twitch_id] = game_obj + self._download_game_box_art(game_obj, game_obj.box_art) return game_obj + def _download_game_box_art(self, game_obj: Game, box_art_url: str | None) -> None: + """Download and cache Twitch box art locally when possible.""" + if not box_art_url: + return + if not is_twitch_box_art_url(box_art_url): + return + if game_obj.box_art_file and getattr(game_obj.box_art_file, "name", ""): + return + + normalized_url: str = normalize_twitch_box_art_url(box_art_url) + parsed_url = urlparse(normalized_url) + suffix: str = Path(parsed_url.path).suffix or ".jpg" + file_name: str = f"{game_obj.twitch_id}{suffix}" + + try: + response = httpx.get(normalized_url, timeout=20) + response.raise_for_status() + except httpx.HTTPError as exc: + tqdm.write( + f"{Fore.YELLOW}!{Style.RESET_ALL} Failed to download box art for {game_obj.twitch_id}: {exc}", + ) + return + + game_obj.box_art_file.save(file_name, ContentFile(response.content), save=True) + def _get_or_create_channel(self, channel_info: ChannelInfoSchema) -> Channel: """Get or create a channel from cache or database. diff --git a/twitch/management/commands/download_box_art.py b/twitch/management/commands/download_box_art.py new file mode 100644 index 0000000..41b429f --- /dev/null +++ b/twitch/management/commands/download_box_art.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING +from urllib.parse import ParseResult +from urllib.parse import urlparse + +import httpx +from django.conf import settings +from django.core.files.base import ContentFile +from django.core.management.base import BaseCommand +from django.core.management.base import CommandParser + +from twitch.models import Game +from twitch.utils import is_twitch_box_art_url +from twitch.utils import normalize_twitch_box_art_url + +if TYPE_CHECKING: + from django.db.models import QuerySet + + +class Command(BaseCommand): + """Download and cache Twitch game box art locally.""" + + help = "Download and cache Twitch game box art locally." + + def add_arguments(self, parser: CommandParser) -> None: + """Register command arguments.""" + parser.add_argument( + "--limit", + type=int, + default=None, + help="Limit the number of games to process.", + ) + parser.add_argument( + "--force", + action="store_true", + help="Re-download even if a local box art file already exists.", + ) + + def handle(self, *_args: object, **options: object) -> None: + """Download Twitch box art images for all games.""" + limit_value: object | None = options.get("limit") + limit: int | None = limit_value if isinstance(limit_value, int) else None + force: bool = bool(options.get("force")) + + queryset: QuerySet[Game] = Game.objects.all().order_by("twitch_id") + if limit: + queryset = queryset[:limit] + + total: int = queryset.count() + downloaded: int = 0 + skipped: int = 0 + failed: int = 0 + placeholders_404: int = 0 + + with httpx.Client(timeout=20, follow_redirects=True) as client: + for game in queryset: + if not game.box_art: + skipped += 1 + continue + if not is_twitch_box_art_url(game.box_art): + skipped += 1 + continue + if game.box_art_file and getattr(game.box_art_file, "name", "") and not force: + skipped += 1 + continue + + normalized_url: str = normalize_twitch_box_art_url(game.box_art) + parsed_url: ParseResult = urlparse(normalized_url) + suffix: str = Path(parsed_url.path).suffix or ".jpg" + file_name: str = f"{game.twitch_id}{suffix}" + + try: + response: httpx.Response = client.get(normalized_url) + response.raise_for_status() + except httpx.HTTPError as exc: + failed += 1 + self.stdout.write( + self.style.WARNING( + f"Failed to download box art for {game.twitch_id}: {exc}", + ), + ) + continue + + if response.url.path.endswith("/ttv-static/404_boxart.jpg"): + placeholders_404 += 1 + skipped += 1 + continue + + game.box_art_file.save(file_name, ContentFile(response.content), save=True) + downloaded += 1 + + self.stdout.write( + self.style.SUCCESS( + f"Processed {total} games. Downloaded: {downloaded}, skipped: {skipped}, " + f"404 placeholders: {placeholders_404}, failed: {failed}.", + ), + ) + box_art_dir: Path = Path(settings.MEDIA_ROOT) / "games" / "box_art" + self.stdout.write(self.style.SUCCESS(f"Saved box art to: {box_art_dir}")) diff --git a/twitch/models.py b/twitch/models.py index 2349b35..8a9ca7e 100644 --- a/twitch/models.py +++ b/twitch/models.py @@ -11,6 +11,8 @@ from django.utils import timezone from django.utils.html import format_html from django.utils.safestring import SafeText +from twitch.utils import normalize_twitch_box_art_url + if TYPE_CHECKING: import datetime @@ -187,7 +189,7 @@ class Game(auto_prefetch.Model): return self.box_art_file.url except (AttributeError, OSError, ValueError) as exc: logger.debug("Failed to resolve Game.box_art_file url: %s", exc) - return self.box_art or "" + return normalize_twitch_box_art_url(self.box_art or "") # MARK: TwitchGame diff --git a/twitch/schemas.py b/twitch/schemas.py index 2339c36..d4b46da 100644 --- a/twitch/schemas.py +++ b/twitch/schemas.py @@ -7,6 +7,8 @@ from pydantic import Field from pydantic import field_validator from pydantic import model_validator +from twitch.utils import normalize_twitch_box_art_url + class OrganizationSchema(BaseModel): """Schema for Twitch Organization objects.""" @@ -44,6 +46,24 @@ class GameSchema(BaseModel): "populate_by_name": True, } + @field_validator("box_art_url", mode="before") + @classmethod + def normalize_box_art_url(cls, v: str | None) -> str | None: + """Normalize Twitch box art URLs to higher quality variants. + + Twitch's box art URLs often include size suffixes (e.g. -120x160) that point to lower quality images. + This validator removes those suffixes to get the original higher quality image. + + Args: + v: The raw box_art_url value (str or None). + + Returns: + The normalized box_art_url string, or None if input was None. + """ + if v: + return normalize_twitch_box_art_url(v) + return v + @model_validator(mode="before") @classmethod def normalize_display_name(cls, data: dict | object) -> dict | object: diff --git a/twitch/tests/test_schemas.py b/twitch/tests/test_schemas.py index f9516a0..3249db9 100644 --- a/twitch/tests/test_schemas.py +++ b/twitch/tests/test_schemas.py @@ -4,6 +4,7 @@ from __future__ import annotations from twitch.schemas import DropBenefitSchema from twitch.schemas import DropCampaignSchema +from twitch.schemas import GameSchema from twitch.schemas import GraphQLResponse from twitch.schemas import TimeBasedDropSchema @@ -118,6 +119,20 @@ def test_inventory_operation_validation() -> None: assert first_drop.benefit_edges[0].benefit.name == "Test Benefit" +def test_game_schema_normalizes_twitch_box_art_url() -> None: + """Ensure Twitch box art URLs are normalized for higher quality.""" + schema: GameSchema = GameSchema.model_validate( + { + "id": "65654", + "displayName": "Test Game", + "boxArtURL": "https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB-120x160.jpg", + "__typename": "Game", + }, + ) + + assert schema.box_art_url == "https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB.jpg" + + def test_viewer_drops_dashboard_operation_still_works() -> None: """Test that the original ViewerDropsDashboard format still validates. diff --git a/twitch/utils.py b/twitch/utils.py index d446b44..ca3da3e 100644 --- a/twitch/utils.py +++ b/twitch/utils.py @@ -1,7 +1,11 @@ from __future__ import annotations +import re from functools import lru_cache from typing import TYPE_CHECKING +from urllib.parse import ParseResult +from urllib.parse import urlparse +from urllib.parse import urlunparse import dateparser from django.utils import timezone @@ -10,6 +14,44 @@ if TYPE_CHECKING: from datetime import datetime +TWITCH_BOX_ART_HOST = "static-cdn.jtvnw.net" +TWITCH_BOX_ART_PATH_PREFIX = "/ttv-boxart/" +TWITCH_BOX_ART_SIZE_PATTERN: re.Pattern[str] = re.compile(r"-(\{width\}|\d+)x(\{height\}|\d+)(?=\.[A-Za-z0-9]+$)") + + +def is_twitch_box_art_url(url: str) -> bool: + """Return True when the URL points at Twitch's box art CDN.""" + if not url: + return False + + parsed: ParseResult = urlparse(url) + return parsed.netloc == TWITCH_BOX_ART_HOST and parsed.path.startswith(TWITCH_BOX_ART_PATH_PREFIX) + + +def normalize_twitch_box_art_url(url: str) -> str: + """Normalize Twitch box art URLs to remove size suffixes for higher quality. + + Example: + https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB-120x160.jpg + -> https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB.jpg + + Args: + url: The Twitch box art URL to normalize. + + Returns: + The normalized Twitch box art URL without size suffixes. + """ + if not url: + return url + + parsed: ParseResult = urlparse(url) + if parsed.netloc != TWITCH_BOX_ART_HOST or not parsed.path.startswith(TWITCH_BOX_ART_PATH_PREFIX): + return url + + normalized_path: str = TWITCH_BOX_ART_SIZE_PATTERN.sub("", parsed.path) + return urlunparse(parsed._replace(path=normalized_path)) + + @lru_cache(maxsize=40 * 40 * 1024) def parse_date(value: str) -> datetime | None: """Parse a datetime string into a timezone-aware datetime using dateparser. diff --git a/twitch/views.py b/twitch/views.py index ac700a8..5452f62 100644 --- a/twitch/views.py +++ b/twitch/views.py @@ -1158,7 +1158,7 @@ class GameDetailView(DetailView): game_description: str = ( f"Twitch drop campaigns for {game_name}. View active, upcoming, and completed drop rewards." ) - game_image: str | None = game.box_art + game_image: str | None = game.box_art_best_url game_schema: dict[str, Any] = { "@context": "https://schema.org", @@ -1167,8 +1167,8 @@ class GameDetailView(DetailView): "description": game_description, "url": self.request.build_absolute_uri(reverse("twitch:game_detail", args=[game.twitch_id])), } - if game.box_art: - game_schema["image"] = game.box_art + if game.box_art_best_url: + game_schema["image"] = game.box_art_best_url if owners: game_schema["publisher"] = { "@type": "Organization", @@ -2295,7 +2295,7 @@ def export_games_csv(request: HttpRequest) -> HttpResponse: # noqa: ARG001 # n game.name, game.display_name, game.slug, - game.box_art, + game.box_art_best_url, game.added_at.isoformat() if game.added_at else "", game.updated_at.isoformat() if game.updated_at else "", ]) @@ -2321,7 +2321,7 @@ def export_games_json(request: HttpRequest) -> HttpResponse: # noqa: ARG001 # "name": game.name, "display_name": game.display_name, "slug": game.slug, - "box_art_url": game.box_art, + "box_art_url": game.box_art_best_url, "added_at": game.added_at.isoformat() if game.added_at else None, "updated_at": game.updated_at.isoformat() if game.updated_at else None, }