WIP better import drops
This commit is contained in:
parent
0400fed26a
commit
69fa30748a
10 changed files with 399 additions and 40 deletions
194
twitch/management/commands/better_import_drops.py
Normal file
194
twitch/management/commands/better_import_drops.py
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures import as_completed
|
||||
from pathlib import Path
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
from django.core.management.base import CommandParser
|
||||
from pydantic import ValidationError
|
||||
|
||||
from twitch.models import Channel
|
||||
from twitch.models import DropBenefit
|
||||
from twitch.models import DropCampaign
|
||||
from twitch.models import Game
|
||||
from twitch.models import Organization
|
||||
from twitch.schemas import ViewerDropsDashboardPayload
|
||||
|
||||
|
||||
def move_failed_validation_file(file_path: Path) -> Path:
|
||||
"""Moves a file that failed validation to a 'broken' subdirectory.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file that failed validation
|
||||
|
||||
Returns:
|
||||
Path to the 'broken' directory where the file was moved
|
||||
"""
|
||||
broken_dir: Path = file_path.parent / "broken"
|
||||
broken_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
target_file: Path = broken_dir / file_path.name
|
||||
file_path.rename(target_file)
|
||||
|
||||
return broken_dir
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Import Twitch drop campaign data from a JSON file or directory of JSON files."""
|
||||
|
||||
help = "Import Twitch drop campaign data from a JSON file or directory"
|
||||
requires_migrations_checks = True
|
||||
|
||||
game_cache: dict[str, Game] = {}
|
||||
organization_cache: dict[str, Organization] = {}
|
||||
drop_campaign_cache: dict[str, DropCampaign] = {}
|
||||
channel_cache: dict[str, Channel] = {}
|
||||
benefit_cache: dict[str, DropBenefit] = {}
|
||||
|
||||
def add_arguments(self, parser: CommandParser) -> None:
|
||||
"""Populate the command with arguments."""
|
||||
parser.add_argument("path", type=str, help="Path to JSON file or directory")
|
||||
parser.add_argument("--recursive", action="store_true", help="Recursively search directories for JSON files")
|
||||
parser.add_argument("--crash-on-error", action="store_true", help="Crash the command on first error instead of continuing")
|
||||
|
||||
def pre_fill_cache(self) -> None:
|
||||
"""Load all existing IDs from DB into memory to avoid N+1 queries."""
|
||||
self.stdout.write("Pre-filling caches...")
|
||||
self.game_cache = {str(g.twitch_id): g for g in Game.objects.all()}
|
||||
self.stdout.write(f"\tGames: {len(self.game_cache)}")
|
||||
|
||||
self.organization_cache = {str(o.twitch_id): o for o in Organization.objects.all()}
|
||||
self.stdout.write(f"\tOrganizations: {len(self.organization_cache)}")
|
||||
|
||||
self.drop_campaign_cache = {str(c.twitch_id): c for c in DropCampaign.objects.all()}
|
||||
self.stdout.write(f"\tDrop Campaigns: {len(self.drop_campaign_cache)}")
|
||||
|
||||
self.channel_cache = {str(ch.twitch_id): ch for ch in Channel.objects.all()}
|
||||
self.stdout.write(f"\tChannels: {len(self.channel_cache)}")
|
||||
|
||||
self.benefit_cache = {str(b.twitch_id): b for b in DropBenefit.objects.all()}
|
||||
self.stdout.write(f"\tBenefits: {len(self.benefit_cache)}")
|
||||
|
||||
def handle(self, *args, **options) -> None: # noqa: ARG002
|
||||
"""Main entry point for the command.
|
||||
|
||||
Raises:
|
||||
CommandError: If the provided path does not exist.
|
||||
"""
|
||||
input_path: Path = Path(options["path"]).resolve()
|
||||
|
||||
self.pre_fill_cache()
|
||||
|
||||
try:
|
||||
if input_path.is_file():
|
||||
self.process_file(file_path=input_path, options=options)
|
||||
elif input_path.is_dir():
|
||||
self.process_json_files(input_path=input_path, options=options)
|
||||
else:
|
||||
msg: str = f"Path does not exist: {input_path}"
|
||||
raise CommandError(msg)
|
||||
except KeyboardInterrupt:
|
||||
self.stdout.write(self.style.WARNING("\n\nInterrupted by user!"))
|
||||
self.stdout.write(self.style.WARNING("Shutting down gracefully..."))
|
||||
sys.exit(130)
|
||||
|
||||
def process_json_files(self, input_path: Path, options: dict) -> None:
|
||||
"""Process multiple JSON files in a directory.
|
||||
|
||||
Args:
|
||||
input_path: Path to the directory containing JSON files
|
||||
options: Command options
|
||||
"""
|
||||
json_files: list[Path] = self.collect_json_files(options, input_path)
|
||||
self.stdout.write(f"Found {len(json_files)} JSON files to process")
|
||||
|
||||
completed_count = 0
|
||||
with ProcessPoolExecutor() as executor:
|
||||
futures = {executor.submit(self.process_file_worker, file_path, options): file_path for file_path in json_files}
|
||||
|
||||
for future in as_completed(futures):
|
||||
file_path: Path = futures[future]
|
||||
try:
|
||||
result: dict[str, bool | str] = future.result()
|
||||
if result["success"]:
|
||||
self.stdout.write(f"✓ {file_path}")
|
||||
else:
|
||||
self.stdout.write(f"✗ {file_path} -> {result['broken_dir']}/{file_path.name}")
|
||||
|
||||
completed_count += 1
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
self.stdout.write(f"✗ {file_path} (error: {e})")
|
||||
completed_count += 1
|
||||
|
||||
self.stdout.write(f"Progress: {completed_count}/{len(json_files)} files processed")
|
||||
self.stdout.write("")
|
||||
|
||||
def collect_json_files(self, options: dict, input_path: Path) -> list[Path]:
|
||||
"""Collect JSON files from the specified directory.
|
||||
|
||||
Args:
|
||||
options: Command options
|
||||
input_path: Path to the directory
|
||||
|
||||
Returns:
|
||||
List of JSON file paths
|
||||
"""
|
||||
json_files: list[Path] = []
|
||||
if options["recursive"]:
|
||||
for root, _dirs, files in os.walk(input_path):
|
||||
root_path = Path(root)
|
||||
json_files.extend(root_path / file for file in files if file.endswith(".json"))
|
||||
else:
|
||||
json_files = [f for f in input_path.iterdir() if f.is_file() and f.suffix == ".json"]
|
||||
return json_files
|
||||
|
||||
@staticmethod
|
||||
def process_file_worker(file_path: Path, options: dict) -> dict[str, bool | str]:
|
||||
"""Worker function for parallel processing of files.
|
||||
|
||||
Args:
|
||||
file_path: Path to the JSON file to process
|
||||
options: Command options
|
||||
|
||||
Raises:
|
||||
ValidationError: If the JSON file fails validation
|
||||
|
||||
Returns:
|
||||
Dict with success status and optional broken_dir path
|
||||
"""
|
||||
try:
|
||||
ViewerDropsDashboardPayload.model_validate_json(file_path.read_text(encoding="utf-8"))
|
||||
except ValidationError:
|
||||
if options["crash_on_error"]:
|
||||
raise
|
||||
|
||||
broken_dir: Path = move_failed_validation_file(file_path)
|
||||
return {"success": False, "broken_dir": str(broken_dir)}
|
||||
else:
|
||||
return {"success": True}
|
||||
|
||||
def process_file(self, file_path: Path, options: dict) -> None:
|
||||
"""Reads a JSON file and processes the campaign data.
|
||||
|
||||
Args:
|
||||
file_path: Path to the JSON file
|
||||
options: Command options
|
||||
|
||||
Raises:
|
||||
ValidationError: If the JSON file fails validation
|
||||
"""
|
||||
self.stdout.write(f"Processing file: {file_path}")
|
||||
|
||||
try:
|
||||
_: ViewerDropsDashboardPayload = ViewerDropsDashboardPayload.model_validate_json(file_path.read_text(encoding="utf-8"))
|
||||
self.stdout.write("\tProcessed drop campaigns")
|
||||
except ValidationError:
|
||||
if options["crash_on_error"]:
|
||||
raise
|
||||
|
||||
broken_dir: Path = move_failed_validation_file(file_path)
|
||||
self.stdout.write(f"\tMoved to {broken_dir} (validation failed)")
|
||||
|
|
@ -187,15 +187,15 @@ class Command(BaseCommand):
|
|||
"""Load existing DB objects into in-memory caches to avoid repeated queries."""
|
||||
# These queries may be heavy if DB is huge — safe because optional via --no-preload
|
||||
with self._cache_locks["game"]:
|
||||
self._game_cache = {str(g.id): g for g in Game.objects.all()}
|
||||
self._game_cache = {str(g.twitch_id): g for g in Game.objects.all()}
|
||||
with self._cache_locks["org"]:
|
||||
self._organization_cache = {str(o.id): o for o in Organization.objects.all()}
|
||||
self._organization_cache = {str(o.twitch_id): o for o in Organization.objects.all()}
|
||||
with self._cache_locks["campaign"]:
|
||||
self._drop_campaign_cache = {str(c.id): c for c in DropCampaign.objects.all()}
|
||||
self._drop_campaign_cache = {str(c.twitch_id): c for c in DropCampaign.objects.all()}
|
||||
with self._cache_locks["channel"]:
|
||||
self._channel_cache = {str(ch.id): ch for ch in Channel.objects.all()}
|
||||
self._channel_cache = {str(ch.twitch_id): ch for ch in Channel.objects.all()}
|
||||
with self._cache_locks["benefit"]:
|
||||
self._benefit_cache = {str(b.id): b for b in DropBenefit.objects.all()}
|
||||
self._benefit_cache = {str(b.twitch_id): b for b in DropBenefit.objects.all()}
|
||||
|
||||
def process_drops(self, *, continue_on_error: bool, path: Path, processed_path: Path) -> None:
|
||||
"""Process drops from a file or directory.
|
||||
|
|
@ -397,8 +397,8 @@ class Command(BaseCommand):
|
|||
return
|
||||
|
||||
if isinstance(data, list):
|
||||
for _item in data:
|
||||
self.import_drop_campaign(_item, file_path=file_path)
|
||||
for item in data:
|
||||
self.import_drop_campaign(item, file_path=file_path)
|
||||
elif isinstance(data, dict):
|
||||
self.import_drop_campaign(data, file_path=file_path)
|
||||
else:
|
||||
|
|
@ -534,7 +534,7 @@ class Command(BaseCommand):
|
|||
|
||||
benefit_edges: list[dict[str, Any]] = drop_data.get("benefitEdges", [])
|
||||
if not benefit_edges:
|
||||
tqdm.write(self.style.WARNING(f"No benefit edges found for drop {time_based_drop.name} (ID: {time_based_drop.id})"))
|
||||
tqdm.write(self.style.WARNING(f"No benefit edges found for drop {time_based_drop.name} (ID: {time_based_drop.twitch_id})"))
|
||||
self.move_file(file_path, Path("no_benefit_edges") / file_path.name)
|
||||
return
|
||||
|
||||
|
|
@ -570,10 +570,10 @@ class Command(BaseCommand):
|
|||
if created:
|
||||
tqdm.write(f"Added {drop_benefit_edge}")
|
||||
except MultipleObjectsReturned as e:
|
||||
msg = f"Error: Multiple DropBenefitEdge objects found for drop {time_based_drop.id} and benefit {benefit.id}. Cannot update or create."
|
||||
msg = f"Error: Multiple DropBenefitEdge objects found for drop {time_based_drop.twitch_id} and benefit {benefit.twitch_id}. Cannot update or create." # noqa: E501
|
||||
raise CommandError(msg) from e
|
||||
except (IntegrityError, DatabaseError, TypeError, ValueError) as e:
|
||||
msg = f"Database or validation error creating DropBenefitEdge for drop {time_based_drop.id} and benefit {benefit.id}: {e}"
|
||||
msg = f"Database or validation error creating DropBenefitEdge for drop {time_based_drop.twitch_id} and benefit {benefit.twitch_id}: {e}"
|
||||
raise CommandError(msg) from e
|
||||
|
||||
def create_time_based_drop(self, drop_campaign: DropCampaign, drop_data: dict[str, Any]) -> TimeBasedDrop:
|
||||
|
|
@ -847,7 +847,7 @@ class Command(BaseCommand):
|
|||
|
||||
# Set the many-to-many relationship (save only if different)
|
||||
current_ids = set(drop_campaign.allow_channels.values_list("id", flat=True))
|
||||
new_ids = {ch.id for ch in channel_objects}
|
||||
new_ids = {ch.twitch_id for ch in channel_objects}
|
||||
if current_ids != new_ids:
|
||||
drop_campaign.allow_channels.set(channel_objects)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue