Refactor Twitch scraper

This commit is contained in:
2024-06-28 05:25:58 +02:00
parent 23bf41aaf0
commit c266f32275

View File

@ -40,7 +40,7 @@ if not data_dir:
logger: logging.Logger = logging.getLogger("twitch.management.commands.scrape_twitch") logger: logging.Logger = logging.getLogger("twitch.management.commands.scrape_twitch")
async def insert_data(data: dict) -> None: # noqa: PLR0914 async def insert_data(data: dict) -> None: # noqa: PLR0914, C901, PLR0912
"""Insert data into the database. """Insert data into the database.
Args: Args:
@ -54,19 +54,16 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
user_id = user_data["id"] user_id = user_data["id"]
drop_campaign_data = user_data["dropCampaign"] drop_campaign_data = user_data["dropCampaign"]
if not drop_campaign_data: if not drop_campaign_data:
logger.debug("No drop campaign data found")
return return
logger.info("Inserting data for user ID: %s", user_id)
# Create or get the organization # Create or get the organization
owner_data = drop_campaign_data["owner"] owner_data = drop_campaign_data["owner"]
owner, created = await sync_to_async(Organization.objects.get_or_create)( owner, created = await sync_to_async(Organization.objects.get_or_create)(
id=owner_data["id"], id=owner_data["id"],
defaults={"name": owner_data["name"]}, defaults={"name": owner_data["name"]},
) )
if created:
logger.debug("Organization %s: %s", "created" if created else "retrieved", owner) logger.debug("Organization created: %s", owner)
# Create or get the game # Create or get the game
game_data = drop_campaign_data["game"] game_data = drop_campaign_data["game"]
@ -77,7 +74,8 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
"display_name": game_data["displayName"], "display_name": game_data["displayName"],
}, },
) )
logger.debug("Game %s: %s", "created" if created else "retrieved", game) if created:
logger.debug("Game created: %s", game)
# Create the drop campaign # Create the drop campaign
drop_campaign, created = await sync_to_async(DropCampaign.objects.get_or_create)( drop_campaign, created = await sync_to_async(DropCampaign.objects.get_or_create)(
@ -95,21 +93,11 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
"owner": owner, "owner": owner,
}, },
) )
logger.debug( if created:
"Drop campaign %s: %s", logger.debug("Drop campaign created: %s", drop_campaign)
"created" if created else "retrieved",
drop_campaign,
)
if not drop_campaign_data["allow"]:
logger.debug("No allowed data in drop campaign")
return
if not drop_campaign_data["allow"]["channels"]:
logger.debug("No allowed channels in drop campaign")
return
# Create channels # Create channels
if drop_campaign_data["allow"] and drop_campaign_data["allow"]["channels"]:
for channel_data in drop_campaign_data["allow"]["channels"]: for channel_data in drop_campaign_data["allow"]["channels"]:
channel, created = await sync_to_async(Channel.objects.get_or_create)( channel, created = await sync_to_async(Channel.objects.get_or_create)(
id=channel_data["id"], id=channel_data["id"],
@ -119,7 +107,8 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
}, },
) )
await sync_to_async(drop_campaign.channels.add)(channel) await sync_to_async(drop_campaign.channels.add)(channel)
logger.debug("Channel %s: %s", "created" if created else "retrieved", channel) if created:
logger.debug("Channel created: %s", channel)
# Create time-based drops # Create time-based drops
for drop_data in drop_campaign_data["timeBasedDrops"]: for drop_data in drop_campaign_data["timeBasedDrops"]:
@ -136,21 +125,16 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
id=benefit_owner_data["id"], id=benefit_owner_data["id"],
defaults={"name": benefit_owner_data["name"]}, defaults={"name": benefit_owner_data["name"]},
) )
logger.debug( if created:
"Benefit owner %s: %s", logger.debug("Benefit owner created: %s", benefit_owner)
"created" if created else "retrieved",
benefit_owner,
)
benefit_game_data = benefit_data["game"] benefit_game_data = benefit_data["game"]
benefit_game, created = await sync_to_async(Game.objects.get_or_create)( benefit_game, created = await sync_to_async(Game.objects.get_or_create)(
id=benefit_game_data["id"], id=benefit_game_data["id"],
defaults={"name": benefit_game_data["name"]}, defaults={"name": benefit_game_data["name"]},
) )
logger.debug( if created:
"Benefit game %s: %s", logger.debug("Benefit game created: %s", benefit_game)
"created" if created else "retrieved",
benefit_game,
)
benefit, created = await sync_to_async(DropBenefit.objects.get_or_create)( benefit, created = await sync_to_async(DropBenefit.objects.get_or_create)(
id=benefit_data["id"], id=benefit_data["id"],
@ -165,11 +149,8 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
}, },
) )
drop_benefits.append(benefit) drop_benefits.append(benefit)
logger.debug( if created:
"Benefit %s: %s", logger.debug("Benefit created: %s", benefit)
"created" if created else "retrieved",
benefit,
)
time_based_drop, created = await sync_to_async( time_based_drop, created = await sync_to_async(
TimeBasedDrop.objects.get_or_create, TimeBasedDrop.objects.get_or_create,
@ -185,20 +166,15 @@ async def insert_data(data: dict) -> None: # noqa: PLR0914
) )
await sync_to_async(time_based_drop.benefits.set)(drop_benefits) await sync_to_async(time_based_drop.benefits.set)(drop_benefits)
await sync_to_async(drop_campaign.time_based_drops.add)(time_based_drop) await sync_to_async(drop_campaign.time_based_drops.add)(time_based_drop)
logger.debug(
"Time-based drop %s: %s", if created:
"created" if created else "retrieved", logger.debug("Time-based drop created: %s", time_based_drop)
time_based_drop,
)
# Create or get the user # Create or get the user
user, created = await sync_to_async(User.objects.get_or_create)(id=user_id) user, created = await sync_to_async(User.objects.get_or_create)(id=user_id)
await sync_to_async(user.drop_campaigns.add)(drop_campaign) await sync_to_async(user.drop_campaigns.add)(drop_campaign)
logger.debug( if created:
"User %s: %s", logger.debug("User created: %s", user)
"created" if created else "retrieved",
user,
)
class Command(BaseCommand): class Command(BaseCommand):
@ -229,10 +205,6 @@ class Command(BaseCommand):
try: try:
body: typing.Any = await response.json() body: typing.Any = await response.json()
json_data.extend(body) json_data.extend(body)
logger.debug(
"Received JSON data from %s",
response.url,
)
except Exception: except Exception:
logger.exception( logger.exception(
"Failed to parse JSON from %s", "Failed to parse JSON from %s",
@ -259,11 +231,12 @@ class Command(BaseCommand):
logger.info("Waiting for login") logger.info("Waiting for login")
await page.wait_for_load_state("networkidle") await page.wait_for_load_state("networkidle")
logger.debug("Page is idle") logger.debug("Page loaded. Scraping data...")
await browser.close() await browser.close()
for campaign in json_data: for num, campaign in enumerate(json_data, start=1):
logger.info("Processing JSON %d of %d", num, len(json_data))
if not isinstance(campaign, dict): if not isinstance(campaign, dict):
continue continue