Cache images instead of serve from Twitch

This commit is contained in:
Joakim Hellsén 2025-09-13 06:37:35 +02:00
commit b97118cffd
16 changed files with 340 additions and 30 deletions

View file

@ -0,0 +1,69 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
from django.core.management.base import BaseCommand
from twitch.models import DropBenefit, DropCampaign, Game
from twitch.utils.images import cache_remote_image
if TYPE_CHECKING: # pragma: no cover - typing only
from argparse import ArgumentParser
from django.db.models import QuerySet
logger: logging.Logger = logging.getLogger(__name__)
class Command(BaseCommand):
"""Backfill local image files for existing rows."""
help = "Download and cache remote images to MEDIA for Games, Campaigns, and Benefits"
def add_arguments(self, parser: ArgumentParser) -> None: # type: ignore[override]
"""Add CLI arguments for the management command."""
parser.add_argument("--limit", type=int, default=0, help="Limit number of objects per model to process (0 = no limit)")
def handle(self, **options: object) -> None:
"""Execute the backfill process using provided options."""
limit: int = int(options.get("limit", 0)) # type: ignore[arg-type]
def maybe_limit(qs: QuerySet) -> QuerySet:
"""Apply slicing if --limit is provided.
Returns:
Queryset possibly sliced by the limit.
"""
return qs[:limit] if limit > 0 else qs
processed = 0
for game in maybe_limit(Game.objects.filter(box_art_file__isnull=True).exclude(box_art="")):
rel = cache_remote_image(game.box_art, "games/box_art")
if rel:
game.box_art_file.name = rel
game.save(update_fields=["box_art_file"]) # type: ignore[list-item]
processed += 1
self.stdout.write(self.style.SUCCESS(f"Processed game box art: {game.id}"))
for campaign in maybe_limit(DropCampaign.objects.filter(image_file__isnull=True).exclude(image_url="")):
rel = cache_remote_image(campaign.image_url, "campaigns/images")
if rel:
campaign.image_file.name = rel
campaign.save(update_fields=["image_file"]) # type: ignore[list-item]
processed += 1
self.stdout.write(self.style.SUCCESS(f"Processed campaign image: {campaign.id}"))
for benefit in maybe_limit(DropBenefit.objects.filter(image_file__isnull=True).exclude(image_asset_url="")):
rel = cache_remote_image(benefit.image_asset_url, "benefits/images")
if rel:
benefit.image_file.name = rel
benefit.save(update_fields=["image_file"]) # type: ignore[list-item]
processed += 1
self.stdout.write(self.style.SUCCESS(f"Processed benefit image: {benefit.id}"))
self.stdout.write(self.style.SUCCESS(f"Backfill complete. Updated {processed} images."))

View file

@ -13,6 +13,7 @@ from django.db import transaction
from django.utils import timezone
from twitch.models import Channel, DropBenefit, DropBenefitEdge, DropCampaign, Game, Organization, TimeBasedDrop
from twitch.utils.images import cache_remote_image
if TYPE_CHECKING:
from datetime import datetime
@ -472,6 +473,13 @@ class Command(BaseCommand):
defaults=benefit_defaults,
)
# Cache benefit image if available and not already cached
if (not benefit.image_file) and benefit.image_asset_url:
rel_path: str | None = cache_remote_image(benefit.image_asset_url, "benefits/images")
if rel_path:
benefit.image_file.name = rel_path
benefit.save(update_fields=["image_file"])
DropBenefitEdge.objects.update_or_create(
drop=time_based_drop,
benefit=benefit,
@ -590,6 +598,13 @@ class Command(BaseCommand):
if created:
self.stdout.write(self.style.SUCCESS(f"Created new drop campaign: {drop_campaign.name} (ID: {drop_campaign.id})"))
# Cache campaign image if available and not already cached
if (not drop_campaign.image_file) and drop_campaign.image_url:
rel_path: str | None = cache_remote_image(drop_campaign.image_url, "campaigns/images")
if rel_path:
drop_campaign.image_file.name = rel_path
drop_campaign.save(update_fields=["image_file"]) # type: ignore[list-item]
return drop_campaign
def owner_update_or_create(self, campaign_data: dict[str, Any]) -> Organization | None:
@ -648,4 +663,11 @@ class Command(BaseCommand):
)
if created:
self.stdout.write(self.style.SUCCESS(f"Created new game: {game.display_name} (ID: {game.id})"))
# Cache game box art if available and not already cached
if (not game.box_art_file) and game.box_art:
rel_path: str | None = cache_remote_image(game.box_art, "games/box_art")
if rel_path:
game.box_art_file.name = rel_path
game.save(update_fields=["box_art_file"])
return game

View file

@ -0,0 +1,29 @@
from __future__ import annotations
from django.db import migrations, models
class Migration(migrations.Migration):
"""Add local image FileFields to models for caching Twitch images."""
dependencies = [
("twitch", "0015_alter_dropbenefitedge_benefit_and_more"),
]
operations = [
migrations.AddField(
model_name="game",
name="box_art_file",
field=models.FileField(blank=True, null=True, upload_to="games/box_art/"),
),
migrations.AddField(
model_name="dropcampaign",
name="image_file",
field=models.FileField(blank=True, null=True, upload_to="campaigns/images/"),
),
migrations.AddField(
model_name="dropbenefit",
name="image_file",
field=models.FileField(blank=True, null=True, upload_to="benefits/images/"),
),
]

View file

@ -0,0 +1,28 @@
# Generated by Django 5.2.6 on 2025-09-13 00:49
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('twitch', '0016_add_local_image_fields'),
]
operations = [
migrations.AlterField(
model_name='dropbenefit',
name='image_file',
field=models.FileField(blank=True, help_text='Locally cached benefit image served from this site.', null=True, upload_to='benefits/images/'),
),
migrations.AlterField(
model_name='dropcampaign',
name='image_file',
field=models.FileField(blank=True, help_text='Locally cached campaign image served from this site.', null=True, upload_to='campaigns/images/'),
),
migrations.AlterField(
model_name='game',
name='box_art_file',
field=models.FileField(blank=True, help_text='Locally cached box art image served from this site.', null=True, upload_to='games/box_art/'),
),
]

View file

@ -95,6 +95,13 @@ class Game(auto_prefetch.Model):
default="",
verbose_name="Box art URL",
)
# Locally cached image file for the game's box art
box_art_file = models.FileField(
upload_to="games/box_art/",
blank=True,
null=True,
help_text="Locally cached box art image served from this site.",
)
# PostgreSQL full-text search field
search_vector = SearchVectorField(null=True, blank=True)
@ -162,6 +169,22 @@ class Game(auto_prefetch.Model):
)
return urlunsplit((parts.scheme, parts.netloc, path, "", ""))
@property
def box_art_best_url(self) -> str:
"""Return the best available URL for the game's box art.
Preference order:
1) Local cached file (MEDIA)
2) Remote Twitch base URL
3) Empty string
"""
try:
if self.box_art_file and getattr(self.box_art_file, "url", None):
return self.box_art_file.url
except (AttributeError, OSError, ValueError) as exc: # storage might not be configured in some contexts
logger.debug("Failed to resolve Game.box_art_file url: %s", exc)
return self.box_art_base_url
@property
def get_game_name(self) -> str:
"""Return the best available name for the game."""
@ -260,6 +283,13 @@ class DropCampaign(auto_prefetch.Model):
default="",
help_text="URL to an image representing the campaign.",
)
# Locally cached campaign image
image_file = models.FileField(
upload_to="campaigns/images/",
blank=True,
null=True,
help_text="Locally cached campaign image served from this site.",
)
start_at = models.DateTimeField(
db_index=True,
null=True,
@ -368,6 +398,16 @@ class DropCampaign(auto_prefetch.Model):
return self.name
@property
def image_best_url(self) -> str:
"""Return the best available URL for the campaign image (local first)."""
try:
if self.image_file and getattr(self.image_file, "url", None):
return self.image_file.url
except (AttributeError, OSError, ValueError) as exc:
logger.debug("Failed to resolve DropCampaign.image_file url: %s", exc)
return self.image_url or ""
class DropBenefit(auto_prefetch.Model):
"""Represents a benefit that can be earned from a drop."""
@ -390,6 +430,13 @@ class DropBenefit(auto_prefetch.Model):
default="",
help_text="URL to the benefit's image asset.",
)
# Locally cached benefit image
image_file = models.FileField(
upload_to="benefits/images/",
blank=True,
null=True,
help_text="Locally cached benefit image served from this site.",
)
created_at = models.DateTimeField(
null=True,
db_index=True,
@ -443,6 +490,16 @@ class DropBenefit(auto_prefetch.Model):
"""Return a string representation of the drop benefit."""
return self.name
@property
def image_best_url(self) -> str:
"""Return the best available URL for the benefit image (local first)."""
try:
if self.image_file and getattr(self.image_file, "url", None):
return self.image_file.url
except (AttributeError, OSError, ValueError) as exc:
logger.debug("Failed to resolve DropBenefit.image_file url: %s", exc)
return self.image_asset_url or ""
class TimeBasedDrop(auto_prefetch.Model):
"""Represents a time-based drop in a drop campaign."""

3
twitch/utils/__init__.py Normal file
View file

@ -0,0 +1,3 @@
from __future__ import annotations
# Utility package for twitch app

97
twitch/utils/images.py Normal file
View file

@ -0,0 +1,97 @@
from __future__ import annotations
import hashlib
import logging
import mimetypes
import re
from pathlib import Path
from urllib.parse import urlparse
from urllib.request import Request, urlopen
from django.conf import settings
logger: logging.Logger = logging.getLogger(__name__)
def _sanitize_filename(name: str) -> str:
"""Return a filesystem-safe filename."""
name = re.sub(r"[^A-Za-z0-9._-]", "_", name)
return name[:150] or "file"
def _guess_extension(url: str, content_type: str | None) -> str:
"""Guess a file extension from URL or content-type.
Args:
url: Source URL.
content_type: Optional content type from HTTP response.
Returns:
File extension including dot, like ".png".
"""
parsed = urlparse(url)
ext = Path(parsed.path).suffix.lower()
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp"}:
return ext
if content_type:
guessed = mimetypes.guess_extension(content_type.split(";")[0].strip())
if guessed:
return guessed
return ".bin"
def cache_remote_image(url: str, subdir: str, *, timeout: float = 10.0) -> str | None:
"""Download a remote image and save it under MEDIA_ROOT, returning storage path.
The file name is the SHA256 of the content to de-duplicate downloads.
Args:
url: Remote image URL.
subdir: Sub-directory under MEDIA_ROOT to store the file.
timeout: Network timeout in seconds.
Returns:
Relative storage path (under MEDIA_ROOT) suitable for assigning to FileField.name,
or None if the operation failed.
"""
url = (url or "").strip()
if not url or not url.startswith(("http://", "https://")):
return None
try:
# Enforce allowed schemes at runtime too
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"}:
return None
req = Request(url, headers={"User-Agent": "TTVDrops/1.0"}) # noqa: S310
# nosec: B310 - urlopen allowed because scheme is validated (http/https only)
with urlopen(req, timeout=timeout) as resp: # noqa: S310
content: bytes = resp.read()
content_type = resp.headers.get("Content-Type")
except OSError as exc:
logger.debug("Failed to download image %s: %s", url, exc)
return None
if not content:
return None
sha = hashlib.sha256(content).hexdigest()
ext = _guess_extension(url, content_type)
# Shard into two-level directories by hash for scalability
shard1, shard2 = sha[:2], sha[2:4]
media_subdir = Path(subdir) / shard1 / shard2
target_dir: Path = Path(settings.MEDIA_ROOT) / media_subdir
target_dir.mkdir(parents=True, exist_ok=True)
filename = f"{sha}{ext}"
storage_rel_path = str(media_subdir / _sanitize_filename(filename)).replace("\\", "/")
storage_abs_path = Path(settings.MEDIA_ROOT) / storage_rel_path
if not storage_abs_path.exists():
try:
storage_abs_path.write_bytes(content)
except OSError as exc:
logger.debug("Failed to write image %s: %s", storage_abs_path, exc)
return None
return storage_rel_path

View file

@ -550,7 +550,7 @@ def dashboard(request: HttpRequest) -> HttpResponse:
if game_id not in campaigns_by_org_game[org_id]["games"]:
campaigns_by_org_game[org_id]["games"][game_id] = {
"name": game_name,
"box_art": campaign.game.box_art_base_url,
"box_art": campaign.game.box_art_best_url,
"campaigns": [],
}