from __future__ import annotations from datetime import UTC from datetime import datetime from typing import TYPE_CHECKING from typing import TypedDict from django.conf import settings from django.contrib.sitemaps import Sitemap from django.db.models import Max from django.db.models import Prefetch from django.db.models.query import QuerySet from django.urls import reverse from twitch.models import Channel from twitch.models import ChatBadgeSet from twitch.models import DropBenefit from twitch.models import DropCampaign from twitch.models import Game from twitch.models import Organization from twitch.models import RewardCampaign from twitch.models import TimeBasedDrop if TYPE_CHECKING: from pathlib import Path from django.db.models import QuerySet class EmoteDict(TypedDict): """Type definition for emote dictionary. Used in TwitchSitemapGenerator to track emotes and their associated campaigns when calculating lastmod for the emote gallery sitemap item. """ image_url: str campaign: DropCampaign class TwitchSitemapGenerator(Sitemap): """Sitemap for static views on the Twitch site.""" def items(self) -> list[str]: """Return list of URL pattern names to include in sitemap.""" # names used in `twitch/urls.py` return [ "twitch:dashboard", "twitch:campaign_list", "twitch:reward_campaign_list", "twitch:games_grid", "twitch:games_list", "twitch:org_list", "twitch:channel_list", "twitch:badge_list", "twitch:emote_gallery", "twitch:search", "twitch:dataset_backups", "twitch:docs_rss", ] def location(self, item: str) -> str: """Return URL for a given item (URL pattern name).""" return reverse(item) def lastmod(self, item: str) -> datetime | None: """Return latest modified time across models relevant to static views.""" if item == "twitch:search": return None if item == "twitch:dashboard": return DropCampaign.objects.aggregate(latest=Max("updated_at"))["latest"] if item == "twitch:campaign_list": return DropCampaign.objects.aggregate(latest=Max("updated_at"))["latest"] if item == "twitch:reward_campaign_list": return RewardCampaign.objects.aggregate(latest=Max("updated_at"))["latest"] if item in {"twitch:games_grid", "twitch:games_list"}: return Game.objects.aggregate(latest=Max("updated_at"))["latest"] if item == "twitch:org_list": return Organization.objects.aggregate(latest=Max("updated_at"))["latest"] if item == "twitch:channel_list": # TODO(TheLovinator): This page is paginated, so we should not # noqa: TD003 # return the latest updated_at across all channels, as that would # cause the entire sitemap to be re-crawled whenever any channel is updated # Instead, we should consider only returning the latest updated_at across # channels included in the first page of results, or implementing a more # sophisticated approach to ensure we don't trigger excessive re-crawling # while still keeping the sitemap reasonably up to date. # return Channel.objects.aggregate(latest=Max("updated_at"))["latest"] return None if item == "twitch:badge_list": return ChatBadgeSet.objects.aggregate(latest=Max("updated_at"))["latest"] if item == "twitch:emote_gallery": # TODO(TheLovinator): Refactor this to avoid duplicating code from the emote gallery view. # noqa: TD003 emote_benefits: QuerySet[DropBenefit, DropBenefit] = ( DropBenefit.objects .filter(distribution_type="EMOTE") .select_related() .prefetch_related( Prefetch( "drops", queryset=TimeBasedDrop.objects.select_related("campaign"), to_attr="_emote_drops", ), ) ) emotes: list[EmoteDict] = [] for benefit in emote_benefits: # Find the first drop with a campaign for this benefit drop: TimeBasedDrop | None = next((d for d in getattr(benefit, "_emote_drops", []) if d.campaign), None) if drop: drop_campaign: DropCampaign | None = drop.campaign if drop_campaign: emotes.append({ "image_url": benefit.image_best_url, "campaign": drop_campaign, }) if not emotes: # If there are no emotes, return None to avoid unnecessarily triggering re-crawls of the sitemap return None # Return the latest updated_at across all campaigns associated with emotes return max(emote["campaign"].updated_at for emote in emotes) if item == "twitch:docs_rss": return None if item == "twitch:dataset_backups": datasets_root: Path = settings.DATA_DIR / "datasets" backup_files: list[Path] = list(datasets_root.glob("dataset_backup_*.zip")) if not backup_files: return None latest_backup: Path = max(backup_files, key=lambda f: f.stat().st_mtime) return datetime.fromtimestamp(latest_backup.stat().st_mtime, tz=UTC) return None class GameSitemap(Sitemap): """Sitemap for games.""" def items(self) -> QuerySet[Game]: """Return queryset of games to include in sitemap.""" return Game.objects.all().only("twitch_id", "updated_at") def lastmod(self, obj: Game) -> datetime | None: """Return last modified time for a given game.""" return obj.updated_at def location(self, obj: Game) -> str: # pyright: ignore[reportIncompatibleMethodOverride] """Return URL for a given game.""" return reverse("twitch:game_detail", args=[obj.twitch_id]) class CampaignSitemap(Sitemap): """Sitemap for drop campaigns.""" def items(self) -> QuerySet[DropCampaign]: """Return queryset of drop campaigns to include in sitemap.""" return DropCampaign.objects.all().only("twitch_id", "updated_at") def lastmod(self, obj: DropCampaign) -> datetime | None: """Return last modified time for a given drop campaign.""" return obj.updated_at def location(self, obj: DropCampaign) -> str: # pyright: ignore[reportIncompatibleMethodOverride] """Return URL for a given drop campaign.""" return reverse("twitch:campaign_detail", args=[obj.twitch_id]) class OrganizationSitemap(Sitemap): """Sitemap for organizations.""" def items(self) -> QuerySet[Organization]: """Return queryset of organizations to include in sitemap.""" return Organization.objects.all().only("twitch_id", "updated_at") def lastmod(self, obj: Organization) -> datetime | None: """Return last modified time for a given organization.""" return obj.updated_at def location(self, obj: Organization) -> str: # pyright: ignore[reportIncompatibleMethodOverride] """Return URL for a given organization.""" return reverse("twitch:organization_detail", args=[obj.twitch_id]) class ChannelSitemap(Sitemap): """Sitemap for individual channels.""" def items(self) -> QuerySet[Channel]: """Return queryset of channels to include in sitemap.""" return Channel.objects.all().only("twitch_id", "updated_at") def lastmod(self, obj: Channel) -> datetime | None: """Return last modified time for a given channel.""" return obj.updated_at def location(self, obj: Channel) -> str: # pyright: ignore[reportIncompatibleMethodOverride] """Return URL for a given channel.""" return reverse("twitch:channel_detail", args=[obj.twitch_id]) class BadgeSitemap(Sitemap): """Sitemap for chat badge sets.""" def items(self) -> QuerySet[ChatBadgeSet]: """Return queryset of chat badge sets to include in sitemap.""" return ChatBadgeSet.objects.all().only("set_id") def lastmod(self, obj: ChatBadgeSet) -> datetime | None: """Return last modified time for a given badge set.""" return obj.updated_at def location(self, obj: ChatBadgeSet) -> str: # pyright: ignore[reportIncompatibleMethodOverride] """Return URL for a given chat badge set.""" return reverse("twitch:badge_set_detail", args=[obj.set_id]) class RewardCampaignSitemap(Sitemap): """Sitemap for reward campaigns.""" def items(self) -> QuerySet[RewardCampaign]: """Return queryset of reward campaigns to include in sitemap.""" return RewardCampaign.objects.all().only("twitch_id", "updated_at") def lastmod(self, obj: RewardCampaign) -> datetime | None: """Return last modified time for a given reward campaign.""" return obj.updated_at def location(self, obj: RewardCampaign) -> str: # pyright: ignore[reportIncompatibleMethodOverride] """Return URL for a given reward campaign.""" return reverse("twitch:reward_campaign_detail", args=[obj.twitch_id])