WIP: Download images

This commit is contained in:
Joakim Hellsén 2025-09-16 20:31:23 +02:00
commit 6751ae724d
5 changed files with 299 additions and 0 deletions

View file

@ -11,6 +11,7 @@
"docstrings",
"dotenv",
"Hellsén",
"IGDB",
"isort",
"Joakim",
"kwargs",

View file

@ -19,6 +19,7 @@ dependencies = [
"psycopg[binary]>=3.2.3",
"pygments>=2.19.2",
"django-auto-prefetch>=1.13.0",
"httpx>=0.28.1",
]
[dependency-groups]

View file

@ -0,0 +1,201 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
import httpx
from django.core.management.base import BaseCommand
from django.db.models.query import QuerySet
from twitch.models import Game
if TYPE_CHECKING:
from collections.abc import Generator
from django.db.models import QuerySet
MAX_AMOUNT_GAMES_PER_REQUEST = 100
"""Maximum number of games that can be requested from Twitch API in a single request."""
class TwitchTooManyGamesError(Exception):
"""Raised when too many game IDs are requested from the Twitch API."""
def __init__(self, max_allowed: int, actual: int) -> None:
"""Initialize the exception with details about the limit breach.
Args:
max_allowed (int): The maximum number of game IDs allowed per request.
actual (int): The actual number of game IDs requested.
"""
self.max_allowed: int = max_allowed
self.actual: int = actual
super().__init__(f"Requested {actual} game IDs, but the maximum allowed is {max_allowed}.")
@dataclass
class AuthResponse:
"""Represents a response from the Twitch authentication endpoint.
{
"access_token": "access12345token",
"expires_in": 5587808,
"token_type": "bearer"
}
The expires_in shows you the number of seconds before the access_token will expire and must be refreshed.
"""
access_token: str
"""OAuth access token."""
expires_in: int
"""The number of seconds before the access_token expires."""
token_type: str
"""The type of the token. For example "bearer"."""
def download_images(twitch_client_id: str, client: httpx.Client, auth: AuthResponse, game_ids: list[int]) -> httpx.Response:
"""Downloads game data including images from the IGDB API.
This function sends a POST request to the IGDB API to fetch game data with all available fields.
The request is authorized using the Twitch client ID and access token.
Args:
twitch_client_id (str): The Twitch client ID used for authentication with IGDB API.
client (httpx.Client): An instance of httpx.Client for making HTTP requests.
auth (AuthResponse): The authentication response object containing the access token.
game_ids (list[int]): A list of game IDs to fetch data for.
Returns:
httpx.Response: The HTTP response from the IGDB API containing game data.
"""
url = "https://api.igdb.com/v4/games"
body: str = "fields *; where id = (" + ",".join(str(gid) for gid in game_ids) + ");"
response: httpx.Response = client.post(
url=url,
headers={"Client-ID": twitch_client_id, "Authorization": f"Bearer {auth.access_token}"},
content=body,
timeout=30.0,
)
return response
def batched_queryset(qs: QuerySet, batch_size: int = 500) -> Generator[list[Any], Any]:
"""Process a Django QuerySet in batches to avoid loading all records into memory at once.
This function yields lists of igdb_id values from the provided QuerySet in batches
of the specified size, allowing for efficient processing of large datasets.
Args:
qs (QuerySet): The Django QuerySet to process in batches
batch_size (int, optional): The number of records to include in each batch. Defaults to 500.
Yields:
list[Any]: A batch of igdb_id values from the QuerySet
Example:
>>> games = Game.objects.all()
>>> for batch in batched_queryset(games):
... process_game_batch(batch)
"""
start = 0
while True:
batch: list[Any] = list(qs.values_list("igdb_id", flat=True)[start : start + batch_size])
if not batch:
break
yield batch
start += batch_size
def get_twitch_data(twitch_client_id: str, game_ids: list[int]) -> httpx.Response:
"""Fetches game data from the Twitch API.
Args:
twitch_client_id (str): The Twitch client ID used for authentication.
game_ids (list[int]): A list of game IDs to fetch data for.
Raises:
TwitchTooManyGamesError: If the number of game IDs exceeds the maximum allowed per request.
ValueError: If the request parameters are invalid.
PermissionError: If the authentication fails due to invalid credentials.
References:
- Twitch API Documentation: https://dev.twitch.tv/docs/api/reference#get-games
Returns:
httpx.Response: The HTTP response from the Twitch API containing game data.
"""
if len(game_ids) > MAX_AMOUNT_GAMES_PER_REQUEST:
raise TwitchTooManyGamesError(MAX_AMOUNT_GAMES_PER_REQUEST, len(game_ids))
with httpx.Client() as client:
response: httpx.Response = client.get(
url="https://api.twitch.tv/helix/games",
headers={"Client-ID": twitch_client_id},
params=[("id", str(gid)) for gid in game_ids],
timeout=30.0,
)
if response.status_code == httpx.codes.OK:
# Successfully retrieved the specified games.
return response
if response.status_code == httpx.codes.BAD_REQUEST:
# The request must specify the id or name or igdb_id query parameter.
# The combined number of game IDs (id and igdb_id) and game names that you specify in the request must not exceed 100.
if len(game_ids) > MAX_AMOUNT_GAMES_PER_REQUEST:
raise TwitchTooManyGamesError(MAX_AMOUNT_GAMES_PER_REQUEST, len(game_ids))
msg: str = f"Bad Request: Check that the request parameters are correct. Response: {response.text}"
raise ValueError(msg)
if response.status_code == httpx.codes.UNAUTHORIZED:
# The Authorization header is required and must specify an app access token or user access token.
# The access token is not valid.
# The ID in the Client-Id header must match the client ID in the access token.
msg = "Unauthorized: Check that the access token and client ID are correct."
raise PermissionError(msg)
return response
class Command(BaseCommand):
"""Populate database with data from Twitch and IGDB APIs."""
help: str = __doc__ or ""
def handle(self, **options) -> None: # noqa: ARG002
"""Execute the image download process."""
twitch_client_id: str | None = os.getenv("TWITCH_CLIENT_ID")
twitch_client_secret: str | None = os.getenv("TWITCH_CLIENT_SECRET")
if not twitch_client_id or not twitch_client_secret:
self.stderr.write(self.style.ERROR("TWITCH_CLIENT_ID and TWITCH_CLIENT_SECRET must be set in environment"))
return
with httpx.Client() as client:
response: httpx.Response = client.post(
url="https://id.twitch.tv/oauth2/token",
params={
"client_id": twitch_client_id,
"client_secret": twitch_client_secret,
"grant_type": "client_credentials",
},
)
auth: AuthResponse = AuthResponse(**response.json())
self.stdout.write(self.style.SUCCESS(f"Auth response: {response.text}"))
# Get game_ids from the database
qs: QuerySet[Game, Game] = Game.objects.all()
for game_ids in batched_queryset(qs, batch_size=500):
with httpx.Client() as client:
download_images(twitch_client_id=twitch_client_id, client=client, auth=auth, game_ids=game_ids)
self.stdout.write(self.style.SUCCESS(f"Fetched {len(game_ids)} games"))

View file

@ -206,6 +206,54 @@ class Game(auto_prefetch.Model):
return ""
# MARK: TwitchGame
class TwitchGameData(auto_prefetch.Model):
"""Represents game metadata returned from the Twitch API.
This mirrors the public Twitch API fields for a game and is tied to the local `Game` model where possible.
Fields:
id: Twitch game id (primary key)
game: Optional FK to the local Game object
name: Display name of the game
box_art_url: URL template for box art with {width}x{height} placeholder
igdb_id: Optional IGDB id for the game
"""
id = models.CharField(max_length=255, primary_key=True, verbose_name="Twitch Game ID")
game = auto_prefetch.ForeignKey(
Game,
on_delete=models.SET_NULL,
related_name="twitch_game_data",
null=True,
blank=True,
verbose_name="Game",
help_text="Optional link to the local Game record for this Twitch game.",
)
name = models.CharField(max_length=255, blank=True, default="", db_index=True, verbose_name="Name")
box_art_url = models.URLField(
max_length=500,
blank=True,
default="",
verbose_name="Box art URL",
help_text="URL template with {width}x{height} placeholders for the box art image.",
)
igdb_id = models.CharField(max_length=255, blank=True, default="", verbose_name="IGDB ID")
added_at = models.DateTimeField(auto_now_add=True, db_index=True, help_text="Record creation time.")
updated_at = models.DateTimeField(auto_now=True, help_text="Record last update time.")
class Meta(auto_prefetch.Model.Meta):
ordering = ["name"]
indexes: ClassVar[list] = [
models.Index(fields=["name"]),
]
def __str__(self) -> str: # pragma: no cover - trivial
return self.name or self.id
# MARK: Channel
class Channel(auto_prefetch.Model):
"""Represents a Twitch channel that can participate in drop campaigns."""

48
uv.lock generated
View file

@ -24,6 +24,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/3c/0464dcada90d5da0e71018c04a140ad6349558afb30b3051b4264cc5b965/asgiref-3.9.1-py3-none-any.whl", hash = "sha256:f3bba7092a48005b5f5bacd747d36ee4a5a61f4a269a6df590b43144355ebd2c", size = 23790, upload-time = "2025-07-08T09:07:41.548Z" },
]
[[package]]
name = "certifi"
version = "2025.8.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
]
[[package]]
name = "click"
version = "8.2.1"
@ -214,6 +223,43 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc", size = 40612, upload-time = "2024-04-08T09:04:17.414Z" },
]
[[package]]
name = "h11"
version = "0.16.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
[[package]]
name = "httpcore"
version = "1.0.9"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "h11" },
]
sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
[[package]]
name = "httpx"
version = "0.28.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "certifi" },
{ name = "httpcore" },
{ name = "idna" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
]
[[package]]
name = "idna"
version = "3.10"
@ -615,6 +661,7 @@ dependencies = [
{ name = "django-stubs", extra = ["compatible-mypy"] },
{ name = "django-watchfiles" },
{ name = "djlint" },
{ name = "httpx" },
{ name = "json-repair" },
{ name = "orjson" },
{ name = "platformdirs" },
@ -640,6 +687,7 @@ requires-dist = [
{ name = "django-stubs", extras = ["compatible-mypy"], specifier = ">=5.2.2" },
{ name = "django-watchfiles", specifier = ">=1.1.0" },
{ name = "djlint", specifier = ">=1.36.4" },
{ name = "httpx", specifier = ">=0.28.1" },
{ name = "json-repair", specifier = ">=0.50.0" },
{ name = "orjson", specifier = ">=3.11.1" },
{ name = "platformdirs", specifier = ">=4.3.8" },