ttvdrops/twitch/sitemaps.py
Joakim Hellsén 415dd12fd9
All checks were successful
Deploy to Server / deploy (push) Successful in 9s
Improve sitemaps
2026-02-27 06:02:30 +01:00

243 lines
9.2 KiB
Python

from __future__ import annotations
from datetime import UTC
from datetime import datetime
from typing import TYPE_CHECKING
from typing import TypedDict
from django.conf import settings
from django.contrib.sitemaps import Sitemap
from django.db.models import Max
from django.db.models import Prefetch
from django.db.models.query import QuerySet
from django.urls import reverse
from twitch.models import Channel
from twitch.models import ChatBadgeSet
from twitch.models import DropBenefit
from twitch.models import DropCampaign
from twitch.models import Game
from twitch.models import Organization
from twitch.models import RewardCampaign
from twitch.models import TimeBasedDrop
if TYPE_CHECKING:
from pathlib import Path
from django.db.models import QuerySet
class EmoteDict(TypedDict):
"""Type definition for emote dictionary.
Used in TwitchSitemapGenerator to track emotes and their associated campaigns when calculating
lastmod for the emote gallery sitemap item.
"""
image_url: str
campaign: DropCampaign
class TwitchSitemapGenerator(Sitemap):
"""Sitemap for static views on the Twitch site."""
def items(self) -> list[str]:
"""Return list of URL pattern names to include in sitemap."""
# names used in `twitch/urls.py`
return [
"twitch:dashboard",
"twitch:campaign_list",
"twitch:reward_campaign_list",
"twitch:games_grid",
"twitch:games_list",
"twitch:org_list",
"twitch:channel_list",
"twitch:badge_list",
"twitch:emote_gallery",
"twitch:search",
"twitch:dataset_backups",
"twitch:docs_rss",
]
def location(self, item: str) -> str:
"""Return URL for a given item (URL pattern name)."""
return reverse(item)
def lastmod(self, item: str) -> datetime | None:
"""Return latest modified time across models relevant to static views."""
if item == "twitch:search":
return None
if item == "twitch:dashboard":
return DropCampaign.objects.aggregate(latest=Max("updated_at"))["latest"]
if item == "twitch:campaign_list":
return DropCampaign.objects.aggregate(latest=Max("updated_at"))["latest"]
if item == "twitch:reward_campaign_list":
return RewardCampaign.objects.aggregate(latest=Max("updated_at"))["latest"]
if item in {"twitch:games_grid", "twitch:games_list"}:
return Game.objects.aggregate(latest=Max("updated_at"))["latest"]
if item == "twitch:org_list":
return Organization.objects.aggregate(latest=Max("updated_at"))["latest"]
if item == "twitch:channel_list":
# TODO(TheLovinator): This page is paginated, so we should not # noqa: TD003
# return the latest updated_at across all channels, as that would
# cause the entire sitemap to be re-crawled whenever any channel is updated
# Instead, we should consider only returning the latest updated_at across
# channels included in the first page of results, or implementing a more
# sophisticated approach to ensure we don't trigger excessive re-crawling
# while still keeping the sitemap reasonably up to date.
# return Channel.objects.aggregate(latest=Max("updated_at"))["latest"]
return None
if item == "twitch:badge_list":
return ChatBadgeSet.objects.aggregate(latest=Max("updated_at"))["latest"]
if item == "twitch:emote_gallery":
# TODO(TheLovinator): Refactor this to avoid duplicating code from the emote gallery view. # noqa: TD003
emote_benefits: QuerySet[DropBenefit, DropBenefit] = (
DropBenefit.objects
.filter(distribution_type="EMOTE")
.select_related()
.prefetch_related(
Prefetch(
"drops",
queryset=TimeBasedDrop.objects.select_related("campaign"),
to_attr="_emote_drops",
),
)
)
emotes: list[EmoteDict] = []
for benefit in emote_benefits:
# Find the first drop with a campaign for this benefit
drop: TimeBasedDrop | None = next((d for d in getattr(benefit, "_emote_drops", []) if d.campaign), None)
if drop:
drop_campaign: DropCampaign | None = drop.campaign
if drop_campaign:
emotes.append({
"image_url": benefit.image_best_url,
"campaign": drop_campaign,
})
if not emotes:
# If there are no emotes, return None to avoid unnecessarily triggering re-crawls of the sitemap
return None
# Return the latest updated_at across all campaigns associated with emotes
return max(emote["campaign"].updated_at for emote in emotes)
if item == "twitch:docs_rss":
return None
if item == "twitch:dataset_backups":
datasets_root: Path = settings.DATA_DIR / "datasets"
backup_files: list[Path] = list(datasets_root.glob("dataset_backup_*.zip"))
if not backup_files:
return None
latest_backup: Path = max(backup_files, key=lambda f: f.stat().st_mtime)
return datetime.fromtimestamp(latest_backup.stat().st_mtime, tz=UTC)
return None
class GameSitemap(Sitemap):
"""Sitemap for games."""
def items(self) -> QuerySet[Game]:
"""Return queryset of games to include in sitemap."""
return Game.objects.all().only("twitch_id", "updated_at")
def lastmod(self, obj: Game) -> datetime | None:
"""Return last modified time for a given game."""
return obj.updated_at
def location(self, obj: Game) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""Return URL for a given game."""
return reverse("twitch:game_detail", args=[obj.twitch_id])
class CampaignSitemap(Sitemap):
"""Sitemap for drop campaigns."""
def items(self) -> QuerySet[DropCampaign]:
"""Return queryset of drop campaigns to include in sitemap."""
return DropCampaign.objects.all().only("twitch_id", "updated_at")
def lastmod(self, obj: DropCampaign) -> datetime | None:
"""Return last modified time for a given drop campaign."""
return obj.updated_at
def location(self, obj: DropCampaign) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""Return URL for a given drop campaign."""
return reverse("twitch:campaign_detail", args=[obj.twitch_id])
class OrganizationSitemap(Sitemap):
"""Sitemap for organizations."""
def items(self) -> QuerySet[Organization]:
"""Return queryset of organizations to include in sitemap."""
return Organization.objects.all().only("twitch_id", "updated_at")
def lastmod(self, obj: Organization) -> datetime | None:
"""Return last modified time for a given organization."""
return obj.updated_at
def location(self, obj: Organization) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""Return URL for a given organization."""
return reverse("twitch:organization_detail", args=[obj.twitch_id])
class ChannelSitemap(Sitemap):
"""Sitemap for individual channels."""
def items(self) -> QuerySet[Channel]:
"""Return queryset of channels to include in sitemap."""
return Channel.objects.all().only("twitch_id", "updated_at")
def lastmod(self, obj: Channel) -> datetime | None:
"""Return last modified time for a given channel."""
return obj.updated_at
def location(self, obj: Channel) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""Return URL for a given channel."""
return reverse("twitch:channel_detail", args=[obj.twitch_id])
class BadgeSitemap(Sitemap):
"""Sitemap for chat badge sets."""
def items(self) -> QuerySet[ChatBadgeSet]:
"""Return queryset of chat badge sets to include in sitemap."""
return ChatBadgeSet.objects.all().only("set_id")
def lastmod(self, obj: ChatBadgeSet) -> datetime | None:
"""Return last modified time for a given badge set."""
return obj.updated_at
def location(self, obj: ChatBadgeSet) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""Return URL for a given chat badge set."""
return reverse("twitch:badge_set_detail", args=[obj.set_id])
class RewardCampaignSitemap(Sitemap):
"""Sitemap for reward campaigns."""
def items(self) -> QuerySet[RewardCampaign]:
"""Return queryset of reward campaigns to include in sitemap."""
return RewardCampaign.objects.all().only("twitch_id", "updated_at")
def lastmod(self, obj: RewardCampaign) -> datetime | None:
"""Return last modified time for a given reward campaign."""
return obj.updated_at
def location(self, obj: RewardCampaign) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""Return URL for a given reward campaign."""
return reverse("twitch:reward_campaign_detail", args=[obj.twitch_id])