220 lines
7.9 KiB
Python
220 lines
7.9 KiB
Python
import asyncio
|
|
import logging
|
|
import typing
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING
|
|
|
|
from asgiref.sync import sync_to_async
|
|
from django.core.management.base import BaseCommand
|
|
from platformdirs import user_data_dir
|
|
from playwright.async_api import Playwright, async_playwright
|
|
from playwright.async_api._generated import Response
|
|
|
|
from twitch_app.models import Game, Image, Reward, RewardCampaign, UnlockRequirements
|
|
|
|
if TYPE_CHECKING:
|
|
from playwright.async_api._generated import BrowserContext, Page
|
|
|
|
# Where to store the Chrome profile
|
|
data_dir = Path(
|
|
user_data_dir(
|
|
appname="TTVDrops",
|
|
appauthor="TheLovinator",
|
|
roaming=True,
|
|
ensure_exists=True,
|
|
),
|
|
)
|
|
|
|
if not data_dir:
|
|
msg = "DATA_DIR is not set in settings.py"
|
|
raise ValueError(msg)
|
|
|
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
|
|
|
|
async def add_reward_campaign(json_data: dict) -> None:
|
|
"""Add data from JSON to the database."""
|
|
for campaign_data in json_data["data"]["rewardCampaignsAvailableToUser"]:
|
|
# Add or get Game
|
|
game_data = campaign_data["game"]
|
|
if game_data:
|
|
game, _ = await sync_to_async(Game.objects.get_or_create)(
|
|
id=game_data["id"],
|
|
slug=game_data["slug"],
|
|
defaults={
|
|
"display_name": game_data["displayName"],
|
|
"typename": game_data["__typename"],
|
|
},
|
|
)
|
|
else:
|
|
logger.warning("%s is not for a game?", campaign_data["name"])
|
|
game = None
|
|
|
|
# Add or get Image
|
|
image_data = campaign_data["image"]
|
|
image, _ = await sync_to_async(Image.objects.get_or_create)(
|
|
image1_x_url=image_data["image1xURL"],
|
|
defaults={"typename": image_data["__typename"]},
|
|
)
|
|
|
|
# Create Reward instances
|
|
rewards = []
|
|
for reward_data in campaign_data["rewards"]:
|
|
banner_image_data = reward_data["bannerImage"]
|
|
banner_image, _ = await sync_to_async(Image.objects.get_or_create)(
|
|
image1_x_url=banner_image_data["image1xURL"],
|
|
defaults={"typename": banner_image_data["__typename"]},
|
|
)
|
|
|
|
thumbnail_image_data = reward_data["thumbnailImage"]
|
|
thumbnail_image, _ = await sync_to_async(Image.objects.get_or_create)(
|
|
image1_x_url=thumbnail_image_data["image1xURL"],
|
|
defaults={"typename": thumbnail_image_data["__typename"]},
|
|
)
|
|
|
|
reward, _ = await sync_to_async(Reward.objects.get_or_create)(
|
|
id=reward_data["id"],
|
|
name=reward_data["name"],
|
|
banner_image=banner_image,
|
|
thumbnail_image=thumbnail_image,
|
|
earnable_until=datetime.fromisoformat(reward_data["earnableUntil"].replace("Z", "+00:00")),
|
|
redemption_instructions=reward_data["redemptionInstructions"],
|
|
redemption_url=reward_data["redemptionURL"],
|
|
typename=reward_data["__typename"],
|
|
)
|
|
rewards.append(reward)
|
|
|
|
# Add or get Unlock Requirements
|
|
unlock_requirements_data = campaign_data["unlockRequirements"]
|
|
_, _ = await sync_to_async(UnlockRequirements.objects.get_or_create)(
|
|
subs_goal=unlock_requirements_data["subsGoal"],
|
|
defaults={
|
|
"minute_watched_goal": unlock_requirements_data["minuteWatchedGoal"],
|
|
"typename": unlock_requirements_data["__typename"],
|
|
},
|
|
)
|
|
|
|
# Create Reward Campaign
|
|
reward_campaign, _ = await sync_to_async(RewardCampaign.objects.get_or_create)(
|
|
id=campaign_data["id"],
|
|
name=campaign_data["name"],
|
|
brand=campaign_data["brand"],
|
|
starts_at=datetime.fromisoformat(campaign_data["startsAt"].replace("Z", "+00:00")),
|
|
ends_at=datetime.fromisoformat(campaign_data["endsAt"].replace("Z", "+00:00")),
|
|
status=campaign_data["status"],
|
|
summary=campaign_data["summary"],
|
|
instructions=campaign_data["instructions"],
|
|
external_url=campaign_data["externalURL"],
|
|
reward_value_url_param=campaign_data["rewardValueURLParam"],
|
|
about_url=campaign_data["aboutURL"],
|
|
is_sitewide=campaign_data["isSitewide"],
|
|
game=game,
|
|
image=image,
|
|
typename=campaign_data["__typename"],
|
|
)
|
|
|
|
# Add Rewards to the Campaign
|
|
for reward in rewards:
|
|
await sync_to_async(reward_campaign.rewards.add)(reward)
|
|
|
|
await sync_to_async(reward_campaign.save)()
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = "Scrape Twitch Drops Campaigns with login using Firefox"
|
|
|
|
async def run( # noqa: PLR6301, C901
|
|
self,
|
|
playwright: Playwright,
|
|
) -> list[dict[str, typing.Any]]:
|
|
args = []
|
|
|
|
# disable navigator.webdriver:true flag
|
|
args.append("--disable-blink-features=AutomationControlled")
|
|
|
|
profile_dir: Path = Path(data_dir / "chrome-profile")
|
|
profile_dir.mkdir(parents=True, exist_ok=True)
|
|
logger.debug(
|
|
"Launching Chrome browser with user data directory: %s",
|
|
profile_dir,
|
|
)
|
|
|
|
browser: BrowserContext = await playwright.chromium.launch_persistent_context(
|
|
channel="chrome",
|
|
user_data_dir=profile_dir,
|
|
headless=False,
|
|
args=args,
|
|
)
|
|
logger.debug("Launched Chrome browser")
|
|
|
|
page: Page = await browser.new_page()
|
|
json_data: list[dict] = []
|
|
|
|
async def handle_response(response: Response) -> None:
|
|
if "https://gql.twitch.tv/gql" in response.url:
|
|
try:
|
|
body: typing.Any = await response.json()
|
|
json_data.extend(body)
|
|
except Exception:
|
|
logger.exception(
|
|
"Failed to parse JSON from %s",
|
|
response.url,
|
|
)
|
|
|
|
page.on("response", handle_response)
|
|
await page.goto("https://www.twitch.tv/drops/campaigns")
|
|
logger.debug("Navigated to Twitch drops campaigns page")
|
|
|
|
logged_in = False
|
|
while not logged_in:
|
|
try:
|
|
await page.wait_for_selector(
|
|
'div[data-a-target="top-nav-avatar"]',
|
|
timeout=300000,
|
|
)
|
|
logged_in = True
|
|
logger.info("Logged in to Twitch")
|
|
except KeyboardInterrupt as e:
|
|
raise KeyboardInterrupt from e
|
|
except Exception: # noqa: BLE001
|
|
await asyncio.sleep(5)
|
|
logger.info("Waiting for login")
|
|
|
|
await page.wait_for_load_state("networkidle")
|
|
logger.debug("Page loaded. Scraping data...")
|
|
|
|
# Wait 5 seconds for the page to load
|
|
# await asyncio.sleep(5)
|
|
|
|
await browser.close()
|
|
|
|
for num, campaign in enumerate(json_data, start=1):
|
|
logger.info("Processing JSON %d of %d", num, len(json_data))
|
|
if not isinstance(campaign, dict):
|
|
continue
|
|
|
|
if "rewardCampaignsAvailableToUser" in campaign["data"]:
|
|
await add_reward_campaign(campaign)
|
|
|
|
if "dropCampaign" in campaign.get("data", {}).get("user", {}): # noqa: SIM102
|
|
if not campaign["data"]["user"]["dropCampaign"]:
|
|
continue
|
|
|
|
if "dropCampaigns" in campaign.get("data", {}).get("user", {}):
|
|
msg = "Multiple dropCampaigns not supported"
|
|
raise NotImplementedError(msg)
|
|
|
|
return json_data
|
|
|
|
def handle(self, *args, **kwargs) -> None: # noqa: ANN002, ARG002, ANN003
|
|
asyncio.run(self.run_with_playwright())
|
|
|
|
async def run_with_playwright(self) -> None:
|
|
async with async_playwright() as playwright:
|
|
await self.run(playwright)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
Command().handle()
|