Normalize Twitch box art URLs for higher quality and implement download command
This commit is contained in:
parent
bbeed5c9f0
commit
f4925b8e45
9 changed files with 222 additions and 10 deletions
|
|
@ -411,7 +411,7 @@ class GameFeed(Feed):
|
|||
slug: str = getattr(item, "slug", "")
|
||||
name: str = getattr(item, "name", "")
|
||||
display_name: str = getattr(item, "display_name", "")
|
||||
box_art: str | None = getattr(item, "box_art", None)
|
||||
box_art: str = item.box_art_best_url
|
||||
owner: Organization | None = getattr(item, "owner", None)
|
||||
|
||||
description_parts: list[SafeText] = []
|
||||
|
|
@ -474,7 +474,7 @@ class GameFeed(Feed):
|
|||
|
||||
def item_enclosure_url(self, item: Game) -> str:
|
||||
"""Returns the URL of the game's box art for enclosure."""
|
||||
box_art: str | None = getattr(item, "box_art", None)
|
||||
box_art: str = item.box_art_best_url
|
||||
if box_art:
|
||||
return box_art
|
||||
return ""
|
||||
|
|
|
|||
|
|
@ -8,11 +8,14 @@ from datetime import datetime
|
|||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Literal
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
import json_repair
|
||||
from colorama import Fore
|
||||
from colorama import Style
|
||||
from colorama import init as colorama_init
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django.core.management.base import CommandParser
|
||||
|
|
@ -40,6 +43,8 @@ from twitch.schemas import GraphQLResponse
|
|||
from twitch.schemas import OrganizationSchema
|
||||
from twitch.schemas import RewardCampaign as RewardCampaignSchema
|
||||
from twitch.schemas import TimeBasedDropSchema
|
||||
from twitch.utils import is_twitch_box_art_url
|
||||
from twitch.utils import normalize_twitch_box_art_url
|
||||
from twitch.utils import parse_date
|
||||
|
||||
|
||||
|
|
@ -642,6 +647,7 @@ class Command(BaseCommand):
|
|||
update_fields.append("box_art")
|
||||
if update_fields:
|
||||
game_obj.save(update_fields=update_fields)
|
||||
self._download_game_box_art(game_obj, game_data.box_art_url or game_obj.box_art)
|
||||
return game_obj
|
||||
|
||||
game_obj, created = Game.objects.update_or_create(
|
||||
|
|
@ -659,8 +665,34 @@ class Command(BaseCommand):
|
|||
if created:
|
||||
tqdm.write(f"{Fore.GREEN}✓{Style.RESET_ALL} Created new game: {game_data.display_name}")
|
||||
self.game_cache[game_data.twitch_id] = game_obj
|
||||
self._download_game_box_art(game_obj, game_obj.box_art)
|
||||
return game_obj
|
||||
|
||||
def _download_game_box_art(self, game_obj: Game, box_art_url: str | None) -> None:
|
||||
"""Download and cache Twitch box art locally when possible."""
|
||||
if not box_art_url:
|
||||
return
|
||||
if not is_twitch_box_art_url(box_art_url):
|
||||
return
|
||||
if game_obj.box_art_file and getattr(game_obj.box_art_file, "name", ""):
|
||||
return
|
||||
|
||||
normalized_url: str = normalize_twitch_box_art_url(box_art_url)
|
||||
parsed_url = urlparse(normalized_url)
|
||||
suffix: str = Path(parsed_url.path).suffix or ".jpg"
|
||||
file_name: str = f"{game_obj.twitch_id}{suffix}"
|
||||
|
||||
try:
|
||||
response = httpx.get(normalized_url, timeout=20)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPError as exc:
|
||||
tqdm.write(
|
||||
f"{Fore.YELLOW}!{Style.RESET_ALL} Failed to download box art for {game_obj.twitch_id}: {exc}",
|
||||
)
|
||||
return
|
||||
|
||||
game_obj.box_art_file.save(file_name, ContentFile(response.content), save=True)
|
||||
|
||||
def _get_or_create_channel(self, channel_info: ChannelInfoSchema) -> Channel:
|
||||
"""Get or create a channel from cache or database.
|
||||
|
||||
|
|
|
|||
101
twitch/management/commands/download_box_art.py
Normal file
101
twitch/management/commands/download_box_art.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import ParseResult
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from django.core.files.base import ContentFile
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandParser
|
||||
|
||||
from twitch.models import Game
|
||||
from twitch.utils import is_twitch_box_art_url
|
||||
from twitch.utils import normalize_twitch_box_art_url
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models import QuerySet
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Download and cache Twitch game box art locally."""
|
||||
|
||||
help = "Download and cache Twitch game box art locally."
|
||||
|
||||
def add_arguments(self, parser: CommandParser) -> None:
|
||||
"""Register command arguments."""
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limit the number of games to process.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Re-download even if a local box art file already exists.",
|
||||
)
|
||||
|
||||
def handle(self, *_args: object, **options: object) -> None:
|
||||
"""Download Twitch box art images for all games."""
|
||||
limit_value: object | None = options.get("limit")
|
||||
limit: int | None = limit_value if isinstance(limit_value, int) else None
|
||||
force: bool = bool(options.get("force"))
|
||||
|
||||
queryset: QuerySet[Game] = Game.objects.all().order_by("twitch_id")
|
||||
if limit:
|
||||
queryset = queryset[:limit]
|
||||
|
||||
total: int = queryset.count()
|
||||
downloaded: int = 0
|
||||
skipped: int = 0
|
||||
failed: int = 0
|
||||
placeholders_404: int = 0
|
||||
|
||||
with httpx.Client(timeout=20, follow_redirects=True) as client:
|
||||
for game in queryset:
|
||||
if not game.box_art:
|
||||
skipped += 1
|
||||
continue
|
||||
if not is_twitch_box_art_url(game.box_art):
|
||||
skipped += 1
|
||||
continue
|
||||
if game.box_art_file and getattr(game.box_art_file, "name", "") and not force:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
normalized_url: str = normalize_twitch_box_art_url(game.box_art)
|
||||
parsed_url: ParseResult = urlparse(normalized_url)
|
||||
suffix: str = Path(parsed_url.path).suffix or ".jpg"
|
||||
file_name: str = f"{game.twitch_id}{suffix}"
|
||||
|
||||
try:
|
||||
response: httpx.Response = client.get(normalized_url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPError as exc:
|
||||
failed += 1
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
f"Failed to download box art for {game.twitch_id}: {exc}",
|
||||
),
|
||||
)
|
||||
continue
|
||||
|
||||
if response.url.path.endswith("/ttv-static/404_boxart.jpg"):
|
||||
placeholders_404 += 1
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
game.box_art_file.save(file_name, ContentFile(response.content), save=True)
|
||||
downloaded += 1
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Processed {total} games. Downloaded: {downloaded}, skipped: {skipped}, "
|
||||
f"404 placeholders: {placeholders_404}, failed: {failed}.",
|
||||
),
|
||||
)
|
||||
box_art_dir: Path = Path(settings.MEDIA_ROOT) / "games" / "box_art"
|
||||
self.stdout.write(self.style.SUCCESS(f"Saved box art to: {box_art_dir}"))
|
||||
|
|
@ -11,6 +11,8 @@ from django.utils import timezone
|
|||
from django.utils.html import format_html
|
||||
from django.utils.safestring import SafeText
|
||||
|
||||
from twitch.utils import normalize_twitch_box_art_url
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
|
||||
|
|
@ -187,7 +189,7 @@ class Game(auto_prefetch.Model):
|
|||
return self.box_art_file.url
|
||||
except (AttributeError, OSError, ValueError) as exc:
|
||||
logger.debug("Failed to resolve Game.box_art_file url: %s", exc)
|
||||
return self.box_art or ""
|
||||
return normalize_twitch_box_art_url(self.box_art or "")
|
||||
|
||||
|
||||
# MARK: TwitchGame
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ from pydantic import Field
|
|||
from pydantic import field_validator
|
||||
from pydantic import model_validator
|
||||
|
||||
from twitch.utils import normalize_twitch_box_art_url
|
||||
|
||||
|
||||
class OrganizationSchema(BaseModel):
|
||||
"""Schema for Twitch Organization objects."""
|
||||
|
|
@ -44,6 +46,24 @@ class GameSchema(BaseModel):
|
|||
"populate_by_name": True,
|
||||
}
|
||||
|
||||
@field_validator("box_art_url", mode="before")
|
||||
@classmethod
|
||||
def normalize_box_art_url(cls, v: str | None) -> str | None:
|
||||
"""Normalize Twitch box art URLs to higher quality variants.
|
||||
|
||||
Twitch's box art URLs often include size suffixes (e.g. -120x160) that point to lower quality images.
|
||||
This validator removes those suffixes to get the original higher quality image.
|
||||
|
||||
Args:
|
||||
v: The raw box_art_url value (str or None).
|
||||
|
||||
Returns:
|
||||
The normalized box_art_url string, or None if input was None.
|
||||
"""
|
||||
if v:
|
||||
return normalize_twitch_box_art_url(v)
|
||||
return v
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def normalize_display_name(cls, data: dict | object) -> dict | object:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from __future__ import annotations
|
|||
|
||||
from twitch.schemas import DropBenefitSchema
|
||||
from twitch.schemas import DropCampaignSchema
|
||||
from twitch.schemas import GameSchema
|
||||
from twitch.schemas import GraphQLResponse
|
||||
from twitch.schemas import TimeBasedDropSchema
|
||||
|
||||
|
|
@ -118,6 +119,20 @@ def test_inventory_operation_validation() -> None:
|
|||
assert first_drop.benefit_edges[0].benefit.name == "Test Benefit"
|
||||
|
||||
|
||||
def test_game_schema_normalizes_twitch_box_art_url() -> None:
|
||||
"""Ensure Twitch box art URLs are normalized for higher quality."""
|
||||
schema: GameSchema = GameSchema.model_validate(
|
||||
{
|
||||
"id": "65654",
|
||||
"displayName": "Test Game",
|
||||
"boxArtURL": "https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB-120x160.jpg",
|
||||
"__typename": "Game",
|
||||
},
|
||||
)
|
||||
|
||||
assert schema.box_art_url == "https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB.jpg"
|
||||
|
||||
|
||||
def test_viewer_drops_dashboard_operation_still_works() -> None:
|
||||
"""Test that the original ViewerDropsDashboard format still validates.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import ParseResult
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlunparse
|
||||
|
||||
import dateparser
|
||||
from django.utils import timezone
|
||||
|
|
@ -10,6 +14,44 @@ if TYPE_CHECKING:
|
|||
from datetime import datetime
|
||||
|
||||
|
||||
TWITCH_BOX_ART_HOST = "static-cdn.jtvnw.net"
|
||||
TWITCH_BOX_ART_PATH_PREFIX = "/ttv-boxart/"
|
||||
TWITCH_BOX_ART_SIZE_PATTERN: re.Pattern[str] = re.compile(r"-(\{width\}|\d+)x(\{height\}|\d+)(?=\.[A-Za-z0-9]+$)")
|
||||
|
||||
|
||||
def is_twitch_box_art_url(url: str) -> bool:
|
||||
"""Return True when the URL points at Twitch's box art CDN."""
|
||||
if not url:
|
||||
return False
|
||||
|
||||
parsed: ParseResult = urlparse(url)
|
||||
return parsed.netloc == TWITCH_BOX_ART_HOST and parsed.path.startswith(TWITCH_BOX_ART_PATH_PREFIX)
|
||||
|
||||
|
||||
def normalize_twitch_box_art_url(url: str) -> str:
|
||||
"""Normalize Twitch box art URLs to remove size suffixes for higher quality.
|
||||
|
||||
Example:
|
||||
https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB-120x160.jpg
|
||||
-> https://static-cdn.jtvnw.net/ttv-boxart/65654_IGDB.jpg
|
||||
|
||||
Args:
|
||||
url: The Twitch box art URL to normalize.
|
||||
|
||||
Returns:
|
||||
The normalized Twitch box art URL without size suffixes.
|
||||
"""
|
||||
if not url:
|
||||
return url
|
||||
|
||||
parsed: ParseResult = urlparse(url)
|
||||
if parsed.netloc != TWITCH_BOX_ART_HOST or not parsed.path.startswith(TWITCH_BOX_ART_PATH_PREFIX):
|
||||
return url
|
||||
|
||||
normalized_path: str = TWITCH_BOX_ART_SIZE_PATTERN.sub("", parsed.path)
|
||||
return urlunparse(parsed._replace(path=normalized_path))
|
||||
|
||||
|
||||
@lru_cache(maxsize=40 * 40 * 1024)
|
||||
def parse_date(value: str) -> datetime | None:
|
||||
"""Parse a datetime string into a timezone-aware datetime using dateparser.
|
||||
|
|
|
|||
|
|
@ -1158,7 +1158,7 @@ class GameDetailView(DetailView):
|
|||
game_description: str = (
|
||||
f"Twitch drop campaigns for {game_name}. View active, upcoming, and completed drop rewards."
|
||||
)
|
||||
game_image: str | None = game.box_art
|
||||
game_image: str | None = game.box_art_best_url
|
||||
|
||||
game_schema: dict[str, Any] = {
|
||||
"@context": "https://schema.org",
|
||||
|
|
@ -1167,8 +1167,8 @@ class GameDetailView(DetailView):
|
|||
"description": game_description,
|
||||
"url": self.request.build_absolute_uri(reverse("twitch:game_detail", args=[game.twitch_id])),
|
||||
}
|
||||
if game.box_art:
|
||||
game_schema["image"] = game.box_art
|
||||
if game.box_art_best_url:
|
||||
game_schema["image"] = game.box_art_best_url
|
||||
if owners:
|
||||
game_schema["publisher"] = {
|
||||
"@type": "Organization",
|
||||
|
|
@ -2295,7 +2295,7 @@ def export_games_csv(request: HttpRequest) -> HttpResponse: # noqa: ARG001 # n
|
|||
game.name,
|
||||
game.display_name,
|
||||
game.slug,
|
||||
game.box_art,
|
||||
game.box_art_best_url,
|
||||
game.added_at.isoformat() if game.added_at else "",
|
||||
game.updated_at.isoformat() if game.updated_at else "",
|
||||
])
|
||||
|
|
@ -2321,7 +2321,7 @@ def export_games_json(request: HttpRequest) -> HttpResponse: # noqa: ARG001 #
|
|||
"name": game.name,
|
||||
"display_name": game.display_name,
|
||||
"slug": game.slug,
|
||||
"box_art_url": game.box_art,
|
||||
"box_art_url": game.box_art_best_url,
|
||||
"added_at": game.added_at.isoformat() if game.added_at else None,
|
||||
"updated_at": game.updated_at.isoformat() if game.updated_at else None,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue