This commit is contained in:
parent
4d53a46850
commit
415dd12fd9
16 changed files with 843 additions and 379 deletions
243
twitch/sitemaps.py
Normal file
243
twitch/sitemaps.py
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TypedDict
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.sitemaps import Sitemap
|
||||
from django.db.models import Max
|
||||
from django.db.models import Prefetch
|
||||
from django.db.models.query import QuerySet
|
||||
from django.urls import reverse
|
||||
|
||||
from twitch.models import Channel
|
||||
from twitch.models import ChatBadgeSet
|
||||
from twitch.models import DropBenefit
|
||||
from twitch.models import DropCampaign
|
||||
from twitch.models import Game
|
||||
from twitch.models import Organization
|
||||
from twitch.models import RewardCampaign
|
||||
from twitch.models import TimeBasedDrop
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from django.db.models import QuerySet
|
||||
|
||||
|
||||
class EmoteDict(TypedDict):
|
||||
"""Type definition for emote dictionary.
|
||||
|
||||
Used in TwitchSitemapGenerator to track emotes and their associated campaigns when calculating
|
||||
lastmod for the emote gallery sitemap item.
|
||||
"""
|
||||
|
||||
image_url: str
|
||||
campaign: DropCampaign
|
||||
|
||||
|
||||
class TwitchSitemapGenerator(Sitemap):
|
||||
"""Sitemap for static views on the Twitch site."""
|
||||
|
||||
def items(self) -> list[str]:
|
||||
"""Return list of URL pattern names to include in sitemap."""
|
||||
# names used in `twitch/urls.py`
|
||||
return [
|
||||
"twitch:dashboard",
|
||||
"twitch:campaign_list",
|
||||
"twitch:reward_campaign_list",
|
||||
"twitch:games_grid",
|
||||
"twitch:games_list",
|
||||
"twitch:org_list",
|
||||
"twitch:channel_list",
|
||||
"twitch:badge_list",
|
||||
"twitch:emote_gallery",
|
||||
"twitch:search",
|
||||
"twitch:dataset_backups",
|
||||
"twitch:docs_rss",
|
||||
]
|
||||
|
||||
def location(self, item: str) -> str:
|
||||
"""Return URL for a given item (URL pattern name)."""
|
||||
return reverse(item)
|
||||
|
||||
def lastmod(self, item: str) -> datetime | None:
|
||||
"""Return latest modified time across models relevant to static views."""
|
||||
if item == "twitch:search":
|
||||
return None
|
||||
|
||||
if item == "twitch:dashboard":
|
||||
return DropCampaign.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
if item == "twitch:campaign_list":
|
||||
return DropCampaign.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
if item == "twitch:reward_campaign_list":
|
||||
return RewardCampaign.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
if item in {"twitch:games_grid", "twitch:games_list"}:
|
||||
return Game.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
if item == "twitch:org_list":
|
||||
return Organization.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
if item == "twitch:channel_list":
|
||||
# TODO(TheLovinator): This page is paginated, so we should not # noqa: TD003
|
||||
# return the latest updated_at across all channels, as that would
|
||||
# cause the entire sitemap to be re-crawled whenever any channel is updated
|
||||
# Instead, we should consider only returning the latest updated_at across
|
||||
# channels included in the first page of results, or implementing a more
|
||||
# sophisticated approach to ensure we don't trigger excessive re-crawling
|
||||
# while still keeping the sitemap reasonably up to date.
|
||||
# return Channel.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
return None
|
||||
|
||||
if item == "twitch:badge_list":
|
||||
return ChatBadgeSet.objects.aggregate(latest=Max("updated_at"))["latest"]
|
||||
|
||||
if item == "twitch:emote_gallery":
|
||||
# TODO(TheLovinator): Refactor this to avoid duplicating code from the emote gallery view. # noqa: TD003
|
||||
emote_benefits: QuerySet[DropBenefit, DropBenefit] = (
|
||||
DropBenefit.objects
|
||||
.filter(distribution_type="EMOTE")
|
||||
.select_related()
|
||||
.prefetch_related(
|
||||
Prefetch(
|
||||
"drops",
|
||||
queryset=TimeBasedDrop.objects.select_related("campaign"),
|
||||
to_attr="_emote_drops",
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
emotes: list[EmoteDict] = []
|
||||
for benefit in emote_benefits:
|
||||
# Find the first drop with a campaign for this benefit
|
||||
drop: TimeBasedDrop | None = next((d for d in getattr(benefit, "_emote_drops", []) if d.campaign), None)
|
||||
|
||||
if drop:
|
||||
drop_campaign: DropCampaign | None = drop.campaign
|
||||
if drop_campaign:
|
||||
emotes.append({
|
||||
"image_url": benefit.image_best_url,
|
||||
"campaign": drop_campaign,
|
||||
})
|
||||
|
||||
if not emotes:
|
||||
# If there are no emotes, return None to avoid unnecessarily triggering re-crawls of the sitemap
|
||||
return None
|
||||
|
||||
# Return the latest updated_at across all campaigns associated with emotes
|
||||
return max(emote["campaign"].updated_at for emote in emotes)
|
||||
|
||||
if item == "twitch:docs_rss":
|
||||
return None
|
||||
|
||||
if item == "twitch:dataset_backups":
|
||||
datasets_root: Path = settings.DATA_DIR / "datasets"
|
||||
backup_files: list[Path] = list(datasets_root.glob("dataset_backup_*.zip"))
|
||||
if not backup_files:
|
||||
return None
|
||||
latest_backup: Path = max(backup_files, key=lambda f: f.stat().st_mtime)
|
||||
return datetime.fromtimestamp(latest_backup.stat().st_mtime, tz=UTC)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class GameSitemap(Sitemap):
|
||||
"""Sitemap for games."""
|
||||
|
||||
def items(self) -> QuerySet[Game]:
|
||||
"""Return queryset of games to include in sitemap."""
|
||||
return Game.objects.all().only("twitch_id", "updated_at")
|
||||
|
||||
def lastmod(self, obj: Game) -> datetime | None:
|
||||
"""Return last modified time for a given game."""
|
||||
return obj.updated_at
|
||||
|
||||
def location(self, obj: Game) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Return URL for a given game."""
|
||||
return reverse("twitch:game_detail", args=[obj.twitch_id])
|
||||
|
||||
|
||||
class CampaignSitemap(Sitemap):
|
||||
"""Sitemap for drop campaigns."""
|
||||
|
||||
def items(self) -> QuerySet[DropCampaign]:
|
||||
"""Return queryset of drop campaigns to include in sitemap."""
|
||||
return DropCampaign.objects.all().only("twitch_id", "updated_at")
|
||||
|
||||
def lastmod(self, obj: DropCampaign) -> datetime | None:
|
||||
"""Return last modified time for a given drop campaign."""
|
||||
return obj.updated_at
|
||||
|
||||
def location(self, obj: DropCampaign) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Return URL for a given drop campaign."""
|
||||
return reverse("twitch:campaign_detail", args=[obj.twitch_id])
|
||||
|
||||
|
||||
class OrganizationSitemap(Sitemap):
|
||||
"""Sitemap for organizations."""
|
||||
|
||||
def items(self) -> QuerySet[Organization]:
|
||||
"""Return queryset of organizations to include in sitemap."""
|
||||
return Organization.objects.all().only("twitch_id", "updated_at")
|
||||
|
||||
def lastmod(self, obj: Organization) -> datetime | None:
|
||||
"""Return last modified time for a given organization."""
|
||||
return obj.updated_at
|
||||
|
||||
def location(self, obj: Organization) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Return URL for a given organization."""
|
||||
return reverse("twitch:organization_detail", args=[obj.twitch_id])
|
||||
|
||||
|
||||
class ChannelSitemap(Sitemap):
|
||||
"""Sitemap for individual channels."""
|
||||
|
||||
def items(self) -> QuerySet[Channel]:
|
||||
"""Return queryset of channels to include in sitemap."""
|
||||
return Channel.objects.all().only("twitch_id", "updated_at")
|
||||
|
||||
def lastmod(self, obj: Channel) -> datetime | None:
|
||||
"""Return last modified time for a given channel."""
|
||||
return obj.updated_at
|
||||
|
||||
def location(self, obj: Channel) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Return URL for a given channel."""
|
||||
return reverse("twitch:channel_detail", args=[obj.twitch_id])
|
||||
|
||||
|
||||
class BadgeSitemap(Sitemap):
|
||||
"""Sitemap for chat badge sets."""
|
||||
|
||||
def items(self) -> QuerySet[ChatBadgeSet]:
|
||||
"""Return queryset of chat badge sets to include in sitemap."""
|
||||
return ChatBadgeSet.objects.all().only("set_id")
|
||||
|
||||
def lastmod(self, obj: ChatBadgeSet) -> datetime | None:
|
||||
"""Return last modified time for a given badge set."""
|
||||
return obj.updated_at
|
||||
|
||||
def location(self, obj: ChatBadgeSet) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Return URL for a given chat badge set."""
|
||||
return reverse("twitch:badge_set_detail", args=[obj.set_id])
|
||||
|
||||
|
||||
class RewardCampaignSitemap(Sitemap):
|
||||
"""Sitemap for reward campaigns."""
|
||||
|
||||
def items(self) -> QuerySet[RewardCampaign]:
|
||||
"""Return queryset of reward campaigns to include in sitemap."""
|
||||
return RewardCampaign.objects.all().only("twitch_id", "updated_at")
|
||||
|
||||
def lastmod(self, obj: RewardCampaign) -> datetime | None:
|
||||
"""Return last modified time for a given reward campaign."""
|
||||
return obj.updated_at
|
||||
|
||||
def location(self, obj: RewardCampaign) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""Return URL for a given reward campaign."""
|
||||
return reverse("twitch:reward_campaign_detail", args=[obj.twitch_id])
|
||||
Loading…
Add table
Add a link
Reference in a new issue