Files
twitch-drop-notifier/twitch_app/management/commands/scrape_twitch.py

220 lines
7.9 KiB
Python

import asyncio
import logging
import typing
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING
from asgiref.sync import sync_to_async
from django.core.management.base import BaseCommand
from platformdirs import user_data_dir
from playwright.async_api import Playwright, async_playwright
from playwright.async_api._generated import Response
from twitch_app.models import Game, Image, Reward, RewardCampaign, UnlockRequirements
if TYPE_CHECKING:
from playwright.async_api._generated import BrowserContext, Page
# Where to store the Chrome profile
data_dir = Path(
user_data_dir(
appname="TTVDrops",
appauthor="TheLovinator",
roaming=True,
ensure_exists=True,
),
)
if not data_dir:
msg = "DATA_DIR is not set in settings.py"
raise ValueError(msg)
logger: logging.Logger = logging.getLogger(__name__)
async def add_reward_campaign(json_data: dict) -> None:
"""Add data from JSON to the database."""
for campaign_data in json_data["data"]["rewardCampaignsAvailableToUser"]:
# Add or get Game
game_data = campaign_data["game"]
if game_data:
game, _ = await sync_to_async(Game.objects.get_or_create)(
id=game_data["id"],
slug=game_data["slug"],
defaults={
"display_name": game_data["displayName"],
"typename": game_data["__typename"],
},
)
else:
logger.warning("%s is not for a game?", campaign_data["name"])
game = None
# Add or get Image
image_data = campaign_data["image"]
image, _ = await sync_to_async(Image.objects.get_or_create)(
image1_x_url=image_data["image1xURL"],
defaults={"typename": image_data["__typename"]},
)
# Create Reward instances
rewards = []
for reward_data in campaign_data["rewards"]:
banner_image_data = reward_data["bannerImage"]
banner_image, _ = await sync_to_async(Image.objects.get_or_create)(
image1_x_url=banner_image_data["image1xURL"],
defaults={"typename": banner_image_data["__typename"]},
)
thumbnail_image_data = reward_data["thumbnailImage"]
thumbnail_image, _ = await sync_to_async(Image.objects.get_or_create)(
image1_x_url=thumbnail_image_data["image1xURL"],
defaults={"typename": thumbnail_image_data["__typename"]},
)
reward, _ = await sync_to_async(Reward.objects.get_or_create)(
id=reward_data["id"],
name=reward_data["name"],
banner_image=banner_image,
thumbnail_image=thumbnail_image,
earnable_until=datetime.fromisoformat(reward_data["earnableUntil"].replace("Z", "+00:00")),
redemption_instructions=reward_data["redemptionInstructions"],
redemption_url=reward_data["redemptionURL"],
typename=reward_data["__typename"],
)
rewards.append(reward)
# Add or get Unlock Requirements
unlock_requirements_data = campaign_data["unlockRequirements"]
_, _ = await sync_to_async(UnlockRequirements.objects.get_or_create)(
subs_goal=unlock_requirements_data["subsGoal"],
defaults={
"minute_watched_goal": unlock_requirements_data["minuteWatchedGoal"],
"typename": unlock_requirements_data["__typename"],
},
)
# Create Reward Campaign
reward_campaign, _ = await sync_to_async(RewardCampaign.objects.get_or_create)(
id=campaign_data["id"],
name=campaign_data["name"],
brand=campaign_data["brand"],
starts_at=datetime.fromisoformat(campaign_data["startsAt"].replace("Z", "+00:00")),
ends_at=datetime.fromisoformat(campaign_data["endsAt"].replace("Z", "+00:00")),
status=campaign_data["status"],
summary=campaign_data["summary"],
instructions=campaign_data["instructions"],
external_url=campaign_data["externalURL"],
reward_value_url_param=campaign_data["rewardValueURLParam"],
about_url=campaign_data["aboutURL"],
is_sitewide=campaign_data["isSitewide"],
game=game,
image=image,
typename=campaign_data["__typename"],
)
# Add Rewards to the Campaign
for reward in rewards:
await sync_to_async(reward_campaign.rewards.add)(reward)
await sync_to_async(reward_campaign.save)()
class Command(BaseCommand):
help = "Scrape Twitch Drops Campaigns with login using Firefox"
async def run( # noqa: PLR6301, C901
self,
playwright: Playwright,
) -> list[dict[str, typing.Any]]:
args = []
# disable navigator.webdriver:true flag
args.append("--disable-blink-features=AutomationControlled")
profile_dir: Path = Path(data_dir / "chrome-profile")
profile_dir.mkdir(parents=True, exist_ok=True)
logger.debug(
"Launching Chrome browser with user data directory: %s",
profile_dir,
)
browser: BrowserContext = await playwright.chromium.launch_persistent_context(
channel="chrome",
user_data_dir=profile_dir,
headless=False,
args=args,
)
logger.debug("Launched Chrome browser")
page: Page = await browser.new_page()
json_data: list[dict] = []
async def handle_response(response: Response) -> None:
if "https://gql.twitch.tv/gql" in response.url:
try:
body: typing.Any = await response.json()
json_data.extend(body)
except Exception:
logger.exception(
"Failed to parse JSON from %s",
response.url,
)
page.on("response", handle_response)
await page.goto("https://www.twitch.tv/drops/campaigns")
logger.debug("Navigated to Twitch drops campaigns page")
logged_in = False
while not logged_in:
try:
await page.wait_for_selector(
'div[data-a-target="top-nav-avatar"]',
timeout=300000,
)
logged_in = True
logger.info("Logged in to Twitch")
except KeyboardInterrupt as e:
raise KeyboardInterrupt from e
except Exception: # noqa: BLE001
await asyncio.sleep(5)
logger.info("Waiting for login")
await page.wait_for_load_state("networkidle")
logger.debug("Page loaded. Scraping data...")
# Wait 5 seconds for the page to load
# await asyncio.sleep(5)
await browser.close()
for num, campaign in enumerate(json_data, start=1):
logger.info("Processing JSON %d of %d", num, len(json_data))
if not isinstance(campaign, dict):
continue
if "rewardCampaignsAvailableToUser" in campaign["data"]:
await add_reward_campaign(campaign)
if "dropCampaign" in campaign.get("data", {}).get("user", {}): # noqa: SIM102
if not campaign["data"]["user"]["dropCampaign"]:
continue
if "dropCampaigns" in campaign.get("data", {}).get("user", {}):
msg = "Multiple dropCampaigns not supported"
raise NotImplementedError(msg)
return json_data
def handle(self, *args, **kwargs) -> None: # noqa: ANN002, ARG002, ANN003
asyncio.run(self.run_with_playwright())
async def run_with_playwright(self) -> None:
async with async_playwright() as playwright:
await self.run(playwright)
if __name__ == "__main__":
Command().handle()