Cache images instead of serve from Twitch
This commit is contained in:
parent
d434eac74a
commit
b97118cffd
16 changed files with 340 additions and 30 deletions
3
twitch/utils/__init__.py
Normal file
3
twitch/utils/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from __future__ import annotations
|
||||
|
||||
# Utility package for twitch app
|
||||
97
twitch/utils/images.py
Normal file
97
twitch/utils/images.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import mimetypes
|
||||
import re
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
logger: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
"""Return a filesystem-safe filename."""
|
||||
name = re.sub(r"[^A-Za-z0-9._-]", "_", name)
|
||||
return name[:150] or "file"
|
||||
|
||||
|
||||
def _guess_extension(url: str, content_type: str | None) -> str:
|
||||
"""Guess a file extension from URL or content-type.
|
||||
|
||||
Args:
|
||||
url: Source URL.
|
||||
content_type: Optional content type from HTTP response.
|
||||
|
||||
Returns:
|
||||
File extension including dot, like ".png".
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
ext = Path(parsed.path).suffix.lower()
|
||||
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp"}:
|
||||
return ext
|
||||
if content_type:
|
||||
guessed = mimetypes.guess_extension(content_type.split(";")[0].strip())
|
||||
if guessed:
|
||||
return guessed
|
||||
return ".bin"
|
||||
|
||||
|
||||
def cache_remote_image(url: str, subdir: str, *, timeout: float = 10.0) -> str | None:
|
||||
"""Download a remote image and save it under MEDIA_ROOT, returning storage path.
|
||||
|
||||
The file name is the SHA256 of the content to de-duplicate downloads.
|
||||
|
||||
Args:
|
||||
url: Remote image URL.
|
||||
subdir: Sub-directory under MEDIA_ROOT to store the file.
|
||||
timeout: Network timeout in seconds.
|
||||
|
||||
Returns:
|
||||
Relative storage path (under MEDIA_ROOT) suitable for assigning to FileField.name,
|
||||
or None if the operation failed.
|
||||
"""
|
||||
url = (url or "").strip()
|
||||
if not url or not url.startswith(("http://", "https://")):
|
||||
return None
|
||||
|
||||
try:
|
||||
# Enforce allowed schemes at runtime too
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in {"http", "https"}:
|
||||
return None
|
||||
req = Request(url, headers={"User-Agent": "TTVDrops/1.0"}) # noqa: S310
|
||||
# nosec: B310 - urlopen allowed because scheme is validated (http/https only)
|
||||
with urlopen(req, timeout=timeout) as resp: # noqa: S310
|
||||
content: bytes = resp.read()
|
||||
content_type = resp.headers.get("Content-Type")
|
||||
except OSError as exc:
|
||||
logger.debug("Failed to download image %s: %s", url, exc)
|
||||
return None
|
||||
|
||||
if not content:
|
||||
return None
|
||||
|
||||
sha = hashlib.sha256(content).hexdigest()
|
||||
ext = _guess_extension(url, content_type)
|
||||
# Shard into two-level directories by hash for scalability
|
||||
shard1, shard2 = sha[:2], sha[2:4]
|
||||
media_subdir = Path(subdir) / shard1 / shard2
|
||||
target_dir: Path = Path(settings.MEDIA_ROOT) / media_subdir
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
filename = f"{sha}{ext}"
|
||||
storage_rel_path = str(media_subdir / _sanitize_filename(filename)).replace("\\", "/")
|
||||
storage_abs_path = Path(settings.MEDIA_ROOT) / storage_rel_path
|
||||
|
||||
if not storage_abs_path.exists():
|
||||
try:
|
||||
storage_abs_path.write_bytes(content)
|
||||
except OSError as exc:
|
||||
logger.debug("Failed to write image %s: %s", storage_abs_path, exc)
|
||||
return None
|
||||
|
||||
return storage_rel_path
|
||||
Loading…
Add table
Add a link
Reference in a new issue