This commit is contained in:
2024-07-01 05:56:36 +02:00
parent 3f7bacca2f
commit 219aee31af
30 changed files with 679 additions and 263 deletions

0
twitch_app/__init__.py Normal file
View File

9
twitch_app/admin.py Normal file
View File

@ -0,0 +1,9 @@
from django.contrib import admin
from .models import DropBenefit, DropCampaign, Game, Organization, TimeBasedDrop
admin.site.register(DropBenefit)
admin.site.register(DropCampaign)
admin.site.register(Game)
admin.site.register(Organization)
admin.site.register(TimeBasedDrop)

125
twitch_app/api.py Normal file
View File

@ -0,0 +1,125 @@
import datetime
from django.db.models.manager import BaseManager
from django.http import HttpRequest
from ninja import Router, Schema
from .models import (
DropBenefit,
DropCampaign,
Game,
Organization,
TimeBasedDrop,
)
router = Router(
tags=["twitch"],
)
class OrganizationSchema(Schema):
id: str | None = None
name: str | None = None
added_at: datetime.datetime | None = None
modified_at: datetime.datetime | None = None
class ChannelSchema(Schema):
id: str
display_name: str | None = None
name: str | None = None
added_at: datetime.datetime | None = None
modified_at: datetime.datetime | None = None
class GameSchema(Schema):
id: str
slug: str | None = None
twitch_url: str | None = None
display_name: str | None = None
added_at: datetime.datetime | None = None
modified_at: datetime.datetime | None = None
class DropBenefitSchema(Schema):
id: str
created_at: datetime.datetime | None = None
entitlement_limit: int | None = None
image_asset_url: str | None = None
is_ios_available: bool | None = None
name: str | None = None
owner_organization: OrganizationSchema
game: GameSchema
added_at: datetime.datetime | None = None
modified_at: datetime.datetime | None = None
class TimeBasedDropSchema(Schema):
id: str
required_subs: int | None = None
end_at: datetime.datetime | None = None
name: str | None = None
required_minutes_watched: int | None = None
start_at: datetime.datetime | None = None
benefits: list[DropBenefitSchema]
added_at: datetime.datetime | None = None
modified_at: datetime.datetime | None = None
class DropCampaignSchema(Schema):
id: str
account_link_url: str | None = None
description: str | None = None
details_url: str | None = None
end_at: datetime.datetime | None = None
image_url: str | None = None
name: str | None = None
start_at: datetime.datetime | None = None
status: str | None = None
game: GameSchema | None = None
owner: OrganizationSchema | None = None
channels: list[ChannelSchema] | None = None
time_based_drops: list[TimeBasedDropSchema] | None = None
added_at: datetime.datetime | None = None
modified_at: datetime.datetime | None = None
# http://localhost:8000/api/twitch/organizations
@router.get("/organizations", response=list[OrganizationSchema])
def get_organizations(
request: HttpRequest, # noqa: ARG001
) -> BaseManager[Organization]:
"""Get all organizations."""
return Organization.objects.all()
# http://localhost:8000/api/twitch/games
@router.get("/games", response=list[GameSchema])
def get_games(request: HttpRequest) -> BaseManager[Game]: # noqa: ARG001
"""Get all games."""
return Game.objects.all()
# http://localhost:8000/api/twitch/drop_benefits
@router.get("/drop_benefits", response=list[DropBenefitSchema])
def get_drop_benefits(request: HttpRequest) -> BaseManager[DropBenefit]: # noqa: ARG001
"""Get all drop benefits."""
return DropBenefit.objects.all()
# http://localhost:8000/api/twitch/drop_campaigns
@router.get("/drop_campaigns", response=list[DropCampaignSchema])
def get_drop_campaigns(
request: HttpRequest, # noqa: ARG001
) -> BaseManager[DropCampaign]:
"""Get all drop campaigns."""
return DropCampaign.objects.all()
# http://localhost:8000/api/twitch/time_based_drops
@router.get("/time_based_drops", response=list[TimeBasedDropSchema])
def get_time_based_drops(
request: HttpRequest, # noqa: ARG001
) -> BaseManager[TimeBasedDrop]:
"""Get all time-based drops."""
return TimeBasedDrop.objects.all()

6
twitch_app/apps.py Normal file
View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class TwitchConfig(AppConfig):
default_auto_field: str = "django.db.models.BigAutoField"
name: str = "twitch_app"

View File

View File

@ -0,0 +1,245 @@
import asyncio
import logging
import typing
from pathlib import Path
from typing import TYPE_CHECKING
from asgiref.sync import sync_to_async
from django.core.management.base import BaseCommand
from platformdirs import user_data_dir
from playwright.async_api import Playwright, async_playwright
from playwright.async_api._generated import Response
from twitch_app.models import (
DropBenefit,
DropCampaign,
Game,
Organization,
TimeBasedDrop,
User,
)
if TYPE_CHECKING:
from playwright.async_api._generated import BrowserContext, Page
# Where to store the Firefox profile
data_dir = Path(
user_data_dir(
appname="TTVDrops",
appauthor="TheLovinator",
roaming=True,
ensure_exists=True,
),
)
if not data_dir:
msg = "DATA_DIR is not set in settings.py"
raise ValueError(msg)
logger: logging.Logger = logging.getLogger("twitch.management.commands.scrape_twitch")
async def insert_data(data: dict) -> None: # noqa: PLR0914, C901
"""Insert data into the database.
Args:
data: The data from Twitch.
"""
user_data: dict = data.get("data", {}).get("user")
if not user_data:
logger.debug("No user data found")
return
user_id = user_data["id"]
drop_campaign_data = user_data["dropCampaign"]
if not drop_campaign_data:
return
# Create or get the organization
owner_data = drop_campaign_data["owner"]
owner, created = await sync_to_async(Organization.objects.get_or_create)(
id=owner_data["id"],
defaults={"name": owner_data["name"]},
)
if created:
logger.debug("Organization created: %s", owner)
# Create or get the game
game_data = drop_campaign_data["game"]
game, created = await sync_to_async(Game.objects.get_or_create)(
id=game_data["id"],
defaults={
"slug": game_data["slug"],
"display_name": game_data["displayName"],
},
)
if created:
logger.debug("Game created: %s", game)
# Create the drop campaign
drop_campaign, created = await sync_to_async(DropCampaign.objects.get_or_create)(
id=drop_campaign_data["id"],
defaults={
"account_link_url": drop_campaign_data["accountLinkURL"],
"description": drop_campaign_data["description"],
"details_url": drop_campaign_data["detailsURL"],
"end_at": drop_campaign_data["endAt"],
"image_url": drop_campaign_data["imageURL"],
"name": drop_campaign_data["name"],
"start_at": drop_campaign_data["startAt"],
"status": drop_campaign_data["status"],
"game": game,
"owner": owner,
},
)
if created:
logger.debug("Drop campaign created: %s", drop_campaign)
# Create time-based drops
for drop_data in drop_campaign_data["timeBasedDrops"]:
drop_benefit_edges = drop_data["benefitEdges"]
drop_benefits = []
for edge in drop_benefit_edges:
benefit_data = edge["benefit"]
benefit_owner_data = benefit_data["ownerOrganization"]
benefit_owner, created = await sync_to_async(
Organization.objects.get_or_create,
)(
id=benefit_owner_data["id"],
defaults={"name": benefit_owner_data["name"]},
)
if created:
logger.debug("Benefit owner created: %s", benefit_owner)
benefit_game_data = benefit_data["game"]
benefit_game, created = await sync_to_async(Game.objects.get_or_create)(
id=benefit_game_data["id"],
defaults={"name": benefit_game_data["name"]},
)
if created:
logger.debug("Benefit game created: %s", benefit_game)
benefit, created = await sync_to_async(DropBenefit.objects.get_or_create)(
id=benefit_data["id"],
defaults={
"created_at": benefit_data["createdAt"],
"entitlement_limit": benefit_data["entitlementLimit"],
"image_asset_url": benefit_data["imageAssetURL"],
"is_ios_available": benefit_data["isIosAvailable"],
"name": benefit_data["name"],
"owner_organization": benefit_owner,
"game": benefit_game,
},
)
drop_benefits.append(benefit)
if created:
logger.debug("Benefit created: %s", benefit)
time_based_drop, created = await sync_to_async(
TimeBasedDrop.objects.get_or_create,
)(
id=drop_data["id"],
defaults={
"required_subs": drop_data["requiredSubs"],
"end_at": drop_data["endAt"],
"name": drop_data["name"],
"required_minutes_watched": drop_data["requiredMinutesWatched"],
"start_at": drop_data["startAt"],
},
)
await sync_to_async(time_based_drop.benefits.set)(drop_benefits)
await sync_to_async(drop_campaign.time_based_drops.add)(time_based_drop)
if created:
logger.debug("Time-based drop created: %s", time_based_drop)
# Create or get the user
user, created = await sync_to_async(User.objects.get_or_create)(id=user_id)
await sync_to_async(user.drop_campaigns.add)(drop_campaign)
if created:
logger.debug("User created: %s", user)
class Command(BaseCommand):
help = "Scrape Twitch Drops Campaigns with login using Firefox"
async def run( # noqa: PLR6301, C901
self,
playwright: Playwright,
) -> list[dict[str, typing.Any]]:
profile_dir: Path = Path(data_dir / "firefox-profile")
profile_dir.mkdir(parents=True, exist_ok=True)
logger.debug(
"Launching Firefox browser with user data directory: %s",
profile_dir,
)
browser: BrowserContext = await playwright.firefox.launch_persistent_context(
user_data_dir=profile_dir,
headless=True,
)
logger.debug("Launched Firefox browser")
page: Page = await browser.new_page()
json_data: list[dict] = []
async def handle_response(response: Response) -> None:
if "https://gql.twitch.tv/gql" in response.url:
try:
body: typing.Any = await response.json()
json_data.extend(body)
except Exception:
logger.exception(
"Failed to parse JSON from %s",
response.url,
)
page.on("response", handle_response)
await page.goto("https://www.twitch.tv/drops/campaigns")
logger.debug("Navigated to Twitch drops campaigns page")
logged_in = False
while not logged_in:
try:
await page.wait_for_selector(
'div[data-a-target="top-nav-avatar"]',
timeout=30000,
)
logged_in = True
logger.info("Logged in to Twitch")
except KeyboardInterrupt as e:
raise KeyboardInterrupt from e
except Exception: # noqa: BLE001
await asyncio.sleep(5)
logger.info("Waiting for login")
await page.wait_for_load_state("networkidle")
logger.debug("Page loaded. Scraping data...")
await browser.close()
for num, campaign in enumerate(json_data, start=1):
logger.info("Processing JSON %d of %d", num, len(json_data))
if not isinstance(campaign, dict):
continue
if "dropCampaign" in campaign.get("data", {}).get("user", {}):
await insert_data(campaign)
if "dropCampaigns" in campaign.get("data", {}).get("user", {}):
await insert_data(campaign)
return json_data
def handle(self, *args, **kwargs) -> None: # noqa: ANN002, ARG002, ANN003
asyncio.run(self.run_with_playwright())
async def run_with_playwright(self) -> None:
async with async_playwright() as playwright:
await self.run(playwright)
if __name__ == "__main__":
Command().handle()

View File

@ -0,0 +1,137 @@
# Generated by Django 5.0.6 on 2024-07-01 00:08
import django.db.models.deletion
import django.db.models.functions.text
from django.db import migrations, models
from django.db.migrations.operations.base import Operation
class Migration(migrations.Migration):
initial = True
dependencies: list[tuple[str, str]] = []
operations: list[Operation] = [
migrations.CreateModel(
name="Game",
fields=[
("id", models.TextField(primary_key=True, serialize=False)),
("slug", models.TextField(blank=True, null=True)),
(
"twitch_url",
models.GeneratedField( # type: ignore # noqa: PGH003
db_persist=True,
expression=django.db.models.functions.text.Concat(
models.Value("https://www.twitch.tv/directory/category/"),
"slug",
),
output_field=models.TextField(),
),
),
("display_name", models.TextField(blank=True, null=True)),
("added_at", models.DateTimeField(auto_now_add=True, null=True)),
("modified_at", models.DateTimeField(auto_now=True, null=True)),
],
),
migrations.CreateModel(
name="Organization",
fields=[
("id", models.TextField(primary_key=True, serialize=False)),
("name", models.TextField(blank=True, null=True)),
("added_at", models.DateTimeField(auto_now_add=True, null=True)),
("modified_at", models.DateTimeField(auto_now=True, null=True)),
],
),
migrations.CreateModel(
name="DropBenefit",
fields=[
("id", models.TextField(primary_key=True, serialize=False)),
("created_at", models.DateTimeField(blank=True, null=True)),
("entitlement_limit", models.IntegerField(blank=True, null=True)),
("image_asset_url", models.URLField(blank=True, null=True)),
("is_ios_available", models.BooleanField(blank=True, null=True)),
("name", models.TextField(blank=True, null=True)),
("added_at", models.DateTimeField(auto_now_add=True, null=True)),
("modified_at", models.DateTimeField(auto_now=True, null=True)),
(
"game",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="twitch_app.game",
),
),
(
"owner_organization",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="twitch_app.organization",
),
),
],
),
migrations.CreateModel(
name="TimeBasedDrop",
fields=[
("id", models.TextField(primary_key=True, serialize=False)),
("required_subs", models.IntegerField(blank=True, null=True)),
("end_at", models.DateTimeField(blank=True, null=True)),
("name", models.TextField(blank=True, null=True)),
(
"required_minutes_watched",
models.IntegerField(blank=True, null=True),
),
("start_at", models.DateTimeField(blank=True, null=True)),
("added_at", models.DateTimeField(auto_now_add=True, null=True)),
("modified_at", models.DateTimeField(auto_now=True, null=True)),
("benefits", models.ManyToManyField(to="twitch_app.dropbenefit")),
],
),
migrations.CreateModel(
name="DropCampaign",
fields=[
("id", models.TextField(primary_key=True, serialize=False)),
("account_link_url", models.URLField(blank=True, null=True)),
("description", models.TextField(blank=True, null=True)),
("details_url", models.URLField(blank=True, null=True)),
("end_at", models.DateTimeField(blank=True, null=True)),
("image_url", models.URLField(blank=True, null=True)),
("name", models.TextField(blank=True, null=True)),
("start_at", models.DateTimeField(blank=True, null=True)),
("status", models.TextField(blank=True, null=True)),
("added_at", models.DateTimeField(auto_now_add=True, null=True)),
("modified_at", models.DateTimeField(auto_now=True, null=True)),
(
"game",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="drop_campaigns",
to="twitch_app.game",
),
),
(
"owner",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="drop_campaigns",
to="twitch_app.organization",
),
),
(
"time_based_drops",
models.ManyToManyField(to="twitch_app.timebaseddrop"),
),
],
),
migrations.CreateModel(
name="User",
fields=[
("id", models.TextField(primary_key=True, serialize=False)),
("added_at", models.DateTimeField(auto_now_add=True, null=True)),
("modified_at", models.DateTimeField(auto_now=True, null=True)),
(
"drop_campaigns",
models.ManyToManyField(to="twitch_app.dropcampaign"),
),
],
),
]

View File

@ -0,0 +1,27 @@
# Generated by Django 5.0.6 on 2024-07-01 03:49
import django.db.models.functions.text
from django.db import migrations, models
from django.db.migrations.operations.base import Operation
class Migration(migrations.Migration):
dependencies: list[tuple[str, str]] = [
("twitch_app", "0001_initial"),
]
operations: list[Operation] = [
migrations.AddField(
model_name="game",
name="image_url",
field=models.GeneratedField( # type: ignore # noqa: PGH003
db_persist=True,
expression=django.db.models.functions.text.Concat(
models.Value("https://static-cdn.jtvnw.net/ttv-boxart/"),
"id",
models.Value("_IGDB.jpg"),
),
output_field=models.URLField(),
),
),
]

View File

109
twitch_app/models.py Normal file
View File

@ -0,0 +1,109 @@
from django.db import models
from django.db.models import Value
from django.db.models.functions import (
Concat,
)
class Organization(models.Model):
id = models.TextField(primary_key=True)
name = models.TextField(blank=True, null=True)
added_at = models.DateTimeField(blank=True, null=True, auto_now_add=True)
modified_at = models.DateTimeField(blank=True, null=True, auto_now=True)
def __str__(self) -> str:
return self.name or self.id
class Game(models.Model):
id = models.TextField(primary_key=True)
slug = models.TextField(blank=True, null=True)
twitch_url = models.GeneratedField( # type: ignore # noqa: PGH003
expression=Concat(Value("https://www.twitch.tv/directory/category/"), "slug"),
output_field=models.TextField(),
db_persist=True,
)
image_url = models.GeneratedField( # type: ignore # noqa: PGH003
expression=Concat(
Value("https://static-cdn.jtvnw.net/ttv-boxart/"),
"id",
Value("_IGDB.jpg"),
),
output_field=models.URLField(),
db_persist=True,
)
display_name = models.TextField(blank=True, null=True)
added_at = models.DateTimeField(blank=True, null=True, auto_now_add=True)
modified_at = models.DateTimeField(blank=True, null=True, auto_now=True)
def __str__(self) -> str:
return self.display_name or self.slug or self.id
class DropBenefit(models.Model):
id = models.TextField(primary_key=True)
created_at = models.DateTimeField(blank=True, null=True)
entitlement_limit = models.IntegerField(blank=True, null=True)
image_asset_url = models.URLField(blank=True, null=True)
is_ios_available = models.BooleanField(blank=True, null=True)
name = models.TextField(blank=True, null=True)
owner_organization = models.ForeignKey(Organization, on_delete=models.CASCADE)
game = models.ForeignKey(Game, on_delete=models.CASCADE)
added_at = models.DateTimeField(blank=True, null=True, auto_now_add=True)
modified_at = models.DateTimeField(blank=True, null=True, auto_now=True)
def __str__(self) -> str:
return self.name or self.id
class TimeBasedDrop(models.Model):
id = models.TextField(primary_key=True)
required_subs = models.IntegerField(blank=True, null=True)
end_at = models.DateTimeField(blank=True, null=True)
name = models.TextField(blank=True, null=True)
required_minutes_watched = models.IntegerField(blank=True, null=True)
start_at = models.DateTimeField(blank=True, null=True)
benefits = models.ManyToManyField(DropBenefit)
added_at = models.DateTimeField(blank=True, null=True, auto_now_add=True)
modified_at = models.DateTimeField(blank=True, null=True, auto_now=True)
def __str__(self) -> str:
return self.name or self.id
class DropCampaign(models.Model):
id = models.TextField(primary_key=True)
account_link_url = models.URLField(blank=True, null=True)
description = models.TextField(blank=True, null=True)
details_url = models.URLField(blank=True, null=True)
end_at = models.DateTimeField(blank=True, null=True)
image_url = models.URLField(blank=True, null=True)
name = models.TextField(blank=True, null=True)
start_at = models.DateTimeField(blank=True, null=True)
status = models.TextField(blank=True, null=True)
game = models.ForeignKey(
Game,
on_delete=models.CASCADE,
related_name="drop_campaigns",
)
owner = models.ForeignKey(
Organization,
on_delete=models.CASCADE,
related_name="drop_campaigns",
)
time_based_drops = models.ManyToManyField(TimeBasedDrop)
added_at = models.DateTimeField(blank=True, null=True, auto_now_add=True)
modified_at = models.DateTimeField(blank=True, null=True, auto_now=True)
def __str__(self) -> str:
return self.name or self.id
class User(models.Model):
id = models.TextField(primary_key=True)
drop_campaigns = models.ManyToManyField(DropCampaign)
added_at = models.DateTimeField(blank=True, null=True, auto_now_add=True)
modified_at = models.DateTimeField(blank=True, null=True, auto_now=True)
def __str__(self) -> str:
return self.id

10
twitch_app/urls.py Normal file
View File

@ -0,0 +1,10 @@
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from django.urls import URLPattern
app_name: str = "twitch"
urlpatterns: list[URLPattern] = []