Normalize Twitch box art URLs for higher quality and implement download command

This commit is contained in:
Joakim Hellsén 2026-02-11 23:49:58 +01:00
commit f4925b8e45
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
9 changed files with 222 additions and 10 deletions

View file

@ -22,11 +22,11 @@
title="RSS feed for all campaigns">RSS feed for all campaigns</a>
</div>
<!-- Game image -->
{% if game.box_art %}
{% if game.box_art_best_url %}
<img id="game-image"
height="160"
width="160"
src="{{ game.box_art }}"
src="{{ game.box_art_best_url }}"
alt="{{ game.name }}" />
{% endif %}
<!-- Game owner -->

View file

@ -411,7 +411,7 @@ class GameFeed(Feed):
slug: str = getattr(item, "slug", "")
name: str = getattr(item, "name", "")
display_name: str = getattr(item, "display_name", "")
box_art: str | None = getattr(item, "box_art", None)
box_art: str = item.box_art_best_url
owner: Organization | None = getattr(item, "owner", None)
description_parts: list[SafeText] = []
@ -474,7 +474,7 @@ class GameFeed(Feed):
def item_enclosure_url(self, item: Game) -> str:
"""Returns the URL of the game's box art for enclosure."""
box_art: str | None = getattr(item, "box_art", None)
box_art: str = item.box_art_best_url
if box_art:
return box_art
return ""

View file

@ -8,11 +8,14 @@ from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Literal
from urllib.parse import urlparse
import httpx
import json_repair
from colorama import Fore
from colorama import Style
from colorama import init as colorama_init
from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.management.base import CommandParser
@ -40,6 +43,8 @@ from twitch.schemas import GraphQLResponse
from twitch.schemas import OrganizationSchema
from twitch.schemas import RewardCampaign as RewardCampaignSchema
from twitch.schemas import TimeBasedDropSchema
from twitch.utils import is_twitch_box_art_url
from twitch.utils import normalize_twitch_box_art_url
from twitch.utils import parse_date
@ -642,6 +647,7 @@ class Command(BaseCommand):
update_fields.append("box_art")
if update_fields:
game_obj.save(update_fields=update_fields)
self._download_game_box_art(game_obj, game_data.box_art_url or game_obj.box_art)
return game_obj
game_obj, created = Game.objects.update_or_create(
@ -659,8 +665,34 @@ class Command(BaseCommand):
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new game: {game_data.display_name}")
self.game_cache[game_data.twitch_id] = game_obj
self._download_game_box_art(game_obj, game_obj.box_art)
return game_obj
def _download_game_box_art(self, game_obj: Game, box_art_url: str | None) -> None:
"""Download and cache Twitch box art locally when possible."""
if not box_art_url:
return
if not is_twitch_box_art_url(box_art_url):
return
if game_obj.box_art_file and getattr(game_obj.box_art_file, "name", ""):
return
normalized_url: str = normalize_twitch_box_art_url(box_art_url)
parsed_url = urlparse(normalized_url)
suffix: str = Path(parsed_url.path).suffix or ".jpg"
file_name: str = f"{game_obj.twitch_id}{suffix}"
try:
response = httpx.get(normalized_url, timeout=20)
response.raise_for_status()
except httpx.HTTPError as exc:
tqdm.write(
f"{Fore.YELLOW}!{Style.RESET_ALL} Failed to download box art for {game_obj.twitch_id}: {exc}",
)
return
game_obj.box_art_file.save(file_name, ContentFile(response.content), save=True)
def _get_or_create_channel(self, channel_info: ChannelInfoSchema) -> Channel:
"""Get or create a channel from cache or database.

View file

@ -0,0 +1,101 @@
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import ParseResult
from urllib.parse import urlparse
import httpx
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand
from django.core.management.base import CommandParser
from twitch.models import Game
from twitch.utils import is_twitch_box_art_url
from twitch.utils import normalize_twitch_box_art_url
if TYPE_CHECKING:
from django.db.models import QuerySet
class Command(BaseCommand):
"""Download and cache Twitch game box art locally."""
help = "Download and cache Twitch game box art locally."
def add_arguments(self, parser: CommandParser) -> None:
"""Register command arguments."""
parser.add_argument(
"--limit",
type=int,
default=None,
help="Limit the number of games to process.",
)
parser.add_argument(
"--force",
action="store_true",
help="Re-download even if a local box art file already exists.",
)
def handle(self, *_args: object, **options: object) -> None:
"""Download Twitch box art images for all games."""
limit_value: object | None = options.get("limit")
limit: int | None = limit_value if isinstance(limit_value, int) else None
force: bool = bool(options.get("force"))
queryset: QuerySet[Game] = Game.objects.all().order_by("twitch_id")
if limit:
queryset = queryset[:limit]
total: int = queryset.count()
downloaded: int = 0
skipped: int = 0
failed: int = 0
placeholders_404: int = 0
with httpx.Client(timeout=20, follow_redirects=True) as client:
for game in queryset:
if not game.box_art:
skipped += 1
continue
if not is_twitch_box_art_url(game.box_art):
skipped += 1
continue
if game.box_art_file and getattr(game.box_art_file, "name", "") and not force:
skipped += 1
continue
normalized_url: str = normalize_twitch_box_art_url(game.box_art)
parsed_url: ParseResult = urlparse(normalized_url)
suffix: str = Path(parsed_url.path).suffix or ".jpg"
file_name: str = f"{game.twitch_id}{suffix}"
try:
response: httpx.Response = client.get(normalized_url)
response.raise_for_status()
except httpx.HTTPError as exc:
failed += 1
self.stdout.write(
self.style.WARNING(
f"Failed to download box art for {game.twitch_id}: {exc}",
),
)
continue
if response.url.path.endswith("/ttv-static/404_boxart.jpg"):
placeholders_404 += 1
skipped += 1
continue
game.box_art_file.save(file_name, ContentFile(response.content), save=True)
downloaded += 1
self.stdout.write(
self.style.SUCCESS(
f"Processed {total} games. Downloaded: {downloaded}, skipped: {skipped}, "
f"404 placeholders: {placeholders_404}, failed: {failed}.",
),
)
box_art_dir: Path = Path(settings.MEDIA_ROOT) / "games" / "box_art"
self.stdout.write(self.style.SUCCESS(f"Saved box art to: {box_art_dir}"))

View file

@ -11,6 +11,8 @@ from django.utils import timezone
from django.utils.html import format_html
from django.utils.safestring import SafeText
from twitch.utils import normalize_twitch_box_art_url
if TYPE_CHECKING:
import datetime
@ -187,7 +189,7 @@ class Game(auto_prefetch.Model):
return self.box_art_file.url
except (AttributeError, OSError, ValueError) as exc:
logger.debug("Failed to resolve Game.box_art_file url: %s", exc)
return self.box_art or ""
return normalize_twitch_box_art_url(self.box_art or "")
# MARK: TwitchGame

View file

@ -7,6 +7,8 @@ from pydantic import Field
from pydantic import field_validator
from pydantic import model_validator
from twitch.utils import normalize_twitch_box_art_url
class OrganizationSchema(BaseModel):
"""Schema for Twitch Organization objects."""
@ -44,6 +46,24 @@ class GameSchema(BaseModel):
"populate_by_name": True,
}
@field_validator("box_art_url", mode="before")
@classmethod
def normalize_box_art_url(cls, v: str | None) -> str | None:
"""Normalize Twitch box art URLs to higher quality variants.
Twitch's box art URLs often include size suffixes (e.g. -120x160) that point to lower quality images.
This validator removes those suffixes to get the original higher quality image.
Args:
v: The raw box_art_url value (str or None).
Returns:
The normalized box_art_url string, or None if input was None.
"""
if v:
return normalize_twitch_box_art_url(v)
return v
@model_validator(mode="before")
@classmethod
def normalize_display_name(cls, data: dict | object) -> dict | object:

View file

@ -4,6 +4,7 @@ from __future__ import annotations
from twitch.schemas import DropBenefitSchema
from twitch.schemas import DropCampaignSchema
from twitch.schemas import GameSchema
from twitch.schemas import GraphQLResponse
from twitch.schemas import TimeBasedDropSchema
@ -118,6 +119,20 @@ def test_inventory_operation_validation() -> None:
assert first_drop.benefit_edges[0].benefit.name == "Test Benefit"
def test_game_schema_normalizes_twitch_box_art_url() -> None:
"""Ensure Twitch box art URLs are normalized for higher quality."""
schema: GameSchema = GameSchema.model_validate(
{
"id": "65654",
"displayName": "Test Game",
"boxArtURL": "https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB-120x160.jpg",
"__typename": "Game",
},
)
assert schema.box_art_url == "https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB.jpg"
def test_viewer_drops_dashboard_operation_still_works() -> None:
"""Test that the original ViewerDropsDashboard format still validates.

View file

@ -1,7 +1,11 @@
from __future__ import annotations
import re
from functools import lru_cache
from typing import TYPE_CHECKING
from urllib.parse import ParseResult
from urllib.parse import urlparse
from urllib.parse import urlunparse
import dateparser
from django.utils import timezone
@ -10,6 +14,44 @@ if TYPE_CHECKING:
from datetime import datetime
TWITCH_BOX_ART_HOST = "static-cdn.jtvnw.net"
TWITCH_BOX_ART_PATH_PREFIX = "/ttv-boxart/"
TWITCH_BOX_ART_SIZE_PATTERN: re.Pattern[str] = re.compile(r"-(\{width\}|\d+)x(\{height\}|\d+)(?=\.[A-Za-z0-9]+$)")
def is_twitch_box_art_url(url: str) -> bool:
"""Return True when the URL points at Twitch's box art CDN."""
if not url:
return False
parsed: ParseResult = urlparse(url)
return parsed.netloc == TWITCH_BOX_ART_HOST and parsed.path.startswith(TWITCH_BOX_ART_PATH_PREFIX)
def normalize_twitch_box_art_url(url: str) -> str:
"""Normalize Twitch box art URLs to remove size suffixes for higher quality.
Example:
https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB-120x160.jpg
-> https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB.jpg
Args:
url: The Twitch box art URL to normalize.
Returns:
The normalized Twitch box art URL without size suffixes.
"""
if not url:
return url
parsed: ParseResult = urlparse(url)
if parsed.netloc != TWITCH_BOX_ART_HOST or not parsed.path.startswith(TWITCH_BOX_ART_PATH_PREFIX):
return url
normalized_path: str = TWITCH_BOX_ART_SIZE_PATTERN.sub("", parsed.path)
return urlunparse(parsed._replace(path=normalized_path))
@lru_cache(maxsize=40 * 40 * 1024)
def parse_date(value: str) -> datetime | None:
"""Parse a datetime string into a timezone-aware datetime using dateparser.

View file

@ -1158,7 +1158,7 @@ class GameDetailView(DetailView):
game_description: str = (
f"Twitch drop campaigns for {game_name}. View active, upcoming, and completed drop rewards."
)
game_image: str | None = game.box_art
game_image: str | None = game.box_art_best_url
game_schema: dict[str, Any] = {
"@context": "https://schema.org",
@ -1167,8 +1167,8 @@ class GameDetailView(DetailView):
"description": game_description,
"url": self.request.build_absolute_uri(reverse("twitch:game_detail", args=[game.twitch_id])),
}
if game.box_art:
game_schema["image"] = game.box_art
if game.box_art_best_url:
game_schema["image"] = game.box_art_best_url
if owners:
game_schema["publisher"] = {
"@type": "Organization",
@ -2295,7 +2295,7 @@ def export_games_csv(request: HttpRequest) -> HttpResponse: # noqa: ARG001 # n
game.name,
game.display_name,
game.slug,
game.box_art,
game.box_art_best_url,
game.added_at.isoformat() if game.added_at else "",
game.updated_at.isoformat() if game.updated_at else "",
])
@ -2321,7 +2321,7 @@ def export_games_json(request: HttpRequest) -> HttpResponse: # noqa: ARG001 #
"name": game.name,
"display_name": game.display_name,
"slug": game.slug,
"box_art_url": game.box_art,
"box_art_url": game.box_art_best_url,
"added_at": game.added_at.isoformat() if game.added_at else None,
"updated_at": game.updated_at.isoformat() if game.updated_at else None,
}