Improve import command

This commit is contained in:
Joakim Hellsén 2026-01-05 18:46:46 +01:00
commit 1d6c52325c
No known key found for this signature in database
30 changed files with 2628 additions and 554 deletions

View file

@ -1,11 +1,13 @@
from __future__ import annotations
import json
import os
import sys
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
from datetime import UTC
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Literal
from colorama import Fore
from colorama import Style
@ -13,28 +15,101 @@ from colorama import init as colorama_init
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.management.base import CommandParser
from django.db import DatabaseError
from pydantic import ValidationError
from tqdm import tqdm
from twitch.models import Channel
from twitch.models import DropBenefit
from twitch.models import DropBenefitEdge
from twitch.models import DropCampaign
from twitch.models import Game
from twitch.models import Organization
from twitch.schemas import ViewerDropsDashboardPayload
from twitch.models import TimeBasedDrop
from twitch.schemas import DropBenefitEdgeSchema
from twitch.schemas import DropBenefitSchema
from twitch.schemas import GameSchema
from twitch.schemas import GraphQLResponse
from twitch.schemas import OrganizationSchema
from twitch.schemas import TimeBasedDropSchema
from twitch.utils import parse_date
def move_failed_validation_file(file_path: Path) -> Path:
def get_broken_directory_root() -> Path:
"""Get the root broken directory path from environment or default.
Reads from TTVDROPS_BROKEN_DIR environment variable if set,
otherwise defaults to a directory in the current user's home.
Returns:
Path to the root broken directory.
"""
env_path: str | None = os.environ.get("TTVDROPS_BROKEN_DIR")
if env_path:
return Path(env_path)
# Default to ~/ttvdrops/broken/
home: Path = Path.home()
return home / "ttvdrops" / "broken"
def get_imported_directory_root() -> Path:
"""Get the root imported directory path from environment or default.
Reads from TTVDROPS_IMPORTED_DIR environment variable if set,
otherwise defaults to a directory in the current user's home.
Returns:
Path to the root imported directory.
"""
env_path: str | None = os.environ.get("TTVDROPS_IMPORTED_DIR")
if env_path:
return Path(env_path)
# Default to ~/ttvdrops/imported/
home: Path = Path.home()
return home / "ttvdrops" / "imported"
def _build_broken_directory(
reason: str,
operation_name: str | None = None,
) -> Path:
"""Compute a deeply nested broken directory for triage.
Directory pattern: <broken_root>/<reason>/<operation>/<YYYY>/<MM>/<DD>
This keeps unrelated failures isolated and easy to browse later.
Args:
reason: High-level reason bucket (e.g., validation_failed).
operation_name: Optional operationName extracted from the payload.
Returns:
Path to the directory where the file should live.
"""
safe_reason: str = reason.replace(" ", "_")
op_segment: str = (operation_name or "unknown_op").replace(" ", "_")
now: datetime = datetime.now(tz=UTC)
broken_dir: Path = get_broken_directory_root() / safe_reason / op_segment / f"{now:%Y}" / f"{now:%m}" / f"{now:%d}"
broken_dir.mkdir(parents=True, exist_ok=True)
return broken_dir
def move_failed_validation_file(file_path: Path, operation_name: str | None = None) -> Path:
"""Moves a file that failed validation to a 'broken' subdirectory.
Args:
file_path: Path to the file that failed validation
operation_name: Optional GraphQL operation name for finer grouping
Returns:
Path to the 'broken' directory where the file was moved
"""
broken_dir: Path = file_path.parent / "broken"
broken_dir.mkdir(parents=True, exist_ok=True)
broken_dir: Path = _build_broken_directory(
reason="validation_failed",
operation_name=operation_name,
)
target_file: Path = broken_dir / file_path.name
file_path.rename(target_file)
@ -42,18 +117,25 @@ def move_failed_validation_file(file_path: Path) -> Path:
return broken_dir
def move_file_to_broken_subdir(file_path: Path, subdir: str) -> Path:
"""Move file to a nested broken/<subdir> directory and return that directory.
def move_file_to_broken_subdir(
file_path: Path,
subdir: str,
operation_name: str | None = None,
) -> Path:
"""Move file to broken/<subdir> and return that directory path.
Args:
file_path: The file to move.
subdir: Subdirectory name under "broken" (e.g., the matched keyword).
operation_name: Optional GraphQL operation name for finer grouping
Returns:
Path to the directory where the file was moved.
"""
broken_dir: Path = Path.home() / "broken" / subdir
broken_dir.mkdir(parents=True, exist_ok=True)
broken_dir: Path = _build_broken_directory(
reason=subdir,
operation_name=operation_name,
)
target_file: Path = broken_dir / file_path.name
file_path.rename(target_file)
@ -61,6 +143,45 @@ def move_file_to_broken_subdir(file_path: Path, subdir: str) -> Path:
return broken_dir
def move_completed_file(file_path: Path, operation_name: str | None = None) -> Path:
"""Move a successfully processed file into an operation-named directory.
Moves to <imported_root>/<operation_name>/
Args:
file_path: Path to the processed JSON file.
operation_name: GraphQL operationName extracted from the payload.
Returns:
Path to the directory where the file was moved.
"""
safe_op: str = (operation_name or "unknown_op").replace(" ", "_").replace("/", "_").replace("\\", "_")
target_dir: Path = get_imported_directory_root() / safe_op
target_dir.mkdir(parents=True, exist_ok=True)
target_file: Path = target_dir / file_path.name
file_path.rename(target_file)
return target_dir
# Pre-compute keyword search patterns for faster detection
_KNOWN_NON_CAMPAIGN_PATTERNS: dict[str, str] = {
keyword: f'"operationName": "{keyword}"'
for keyword in [
"ChannelPointsContext",
"ClaimCommunityPoints",
"DirectoryPage_Game",
"DropCurrentSessionContext",
"DropsPage_ClaimDropRewards",
"OnsiteNotifications_DeleteNotification",
"PlaybackAccessToken",
"streamPlaybackAccessToken",
"VideoPlayerStreamInfoOverlayChannel",
]
}
def detect_non_campaign_keyword(raw_text: str) -> str | None:
"""Detect if payload is a known non-drop-campaign response.
@ -73,30 +194,46 @@ def detect_non_campaign_keyword(raw_text: str) -> str | None:
Returns:
The matched keyword, or None if no match found.
"""
probably_shit: list[str] = [
"ChannelPointsContext",
"ClaimCommunityPoints",
"DirectoryPage_Game",
"DropCurrentSessionContext",
"DropsPage_ClaimDropRewards",
"OnsiteNotifications_DeleteNotification",
"PlaybackAccessToken",
"streamPlaybackAccessToken",
"VideoPlayerStreamInfoOverlayChannel",
]
for keyword in probably_shit:
if f'"operationName": "{keyword}"' in raw_text:
for keyword, pattern in _KNOWN_NON_CAMPAIGN_PATTERNS.items():
if pattern in raw_text:
return keyword
return None
def extract_operation_name_from_parsed(
payload: dict[str, Any] | list[Any],
) -> str | None:
"""Extract GraphQL operationName from an already parsed JSON payload.
This is safer than substring scanning. The expected location is
`payload["extensions"]["operationName"]`, but we guard against missing
keys.
Args:
payload: Parsed JSON object or list.
Returns:
The operation name if found, otherwise None.
"""
# Be defensive; never let provenance extraction break the import.
if not isinstance(payload, dict):
return None
extensions: dict[str, Any] | None = payload.get("extensions")
if isinstance(extensions, dict):
op_name: str | None = extensions.get("operationName")
if isinstance(op_name, str):
return op_name
return None
class Command(BaseCommand):
"""Import Twitch drop campaign data from a JSON file or directory of JSON files."""
"""Import Twitch drop campaign data from a JSON file or directory."""
help = "Import Twitch drop campaign data from a JSON file or directory"
requires_migrations_checks = True
# In-memory caches prevent repeated DB lookups during batch imports,
# cutting query volume and keeping runtime predictable.
game_cache: dict[str, Game] = {}
organization_cache: dict[str, Organization] = {}
drop_campaign_cache: dict[str, DropCampaign] = {}
@ -105,13 +242,45 @@ class Command(BaseCommand):
def add_arguments(self, parser: CommandParser) -> None:
"""Populate the command with arguments."""
parser.add_argument("path", type=str, help="Path to JSON file or directory")
parser.add_argument("--recursive", action="store_true", help="Recursively search directories for JSON files")
parser.add_argument("--crash-on-error", action="store_true", help="Crash the command on first error instead of continuing")
parser.add_argument("--verbose", action="store_true", help="Print per-file success messages")
parser.add_argument(
"path",
type=str,
help="Path to JSON file or directory",
)
parser.add_argument(
"--recursive",
action="store_true",
help="Recursively search directories for JSON files",
)
parser.add_argument(
"--crash-on-error",
dest="crash_on_error",
action="store_true",
help="Crash the command on first error instead of continuing",
)
parser.add_argument(
"--verbose",
action="store_true",
help="Print per-file success messages",
)
parser.add_argument(
"--skip-broken-moves",
dest="skip_broken_moves",
action="store_true",
help=(
"Do not move files to the broken directory on failures; useful"
" during testing to avoid unnecessary file moves"
),
)
def pre_fill_cache(self) -> None:
"""Load all existing IDs from DB into memory to avoid N+1 queries."""
"""Load all existing IDs from DB into memory."""
self.game_cache = {}
self.organization_cache = {}
self.drop_campaign_cache = {}
self.channel_cache = {}
self.benefit_cache = {}
cache_operations: list[tuple[str, type, str]] = [
("Games", Game, "game_cache"),
("Organizations", Organization, "organization_cache"),
@ -120,14 +289,386 @@ class Command(BaseCommand):
("Benefits", DropBenefit, "benefit_cache"),
]
with tqdm(cache_operations, desc="Loading caches", unit="cache", colour="cyan") as progress_bar:
for name, model, cache_attr in progress_bar:
progress_bar.set_description(f"Loading {name}")
cache: dict[str, Any] = {str(obj.twitch_id): obj for obj in model.objects.all()}
setattr(self, cache_attr, cache)
progress_bar.write(f" {Fore.GREEN}{Style.RESET_ALL} {name}: {len(cache):,}")
try:
with tqdm(cache_operations, desc="Loading caches", unit="cache", colour="cyan") as progress_bar:
for name, model, cache_attr in progress_bar:
self.load_cache_for_model(progress_bar, name, model, cache_attr)
tqdm.write("")
except (DatabaseError, OSError, RuntimeError, ValueError, TypeError):
# If cache loading fails completely, just use empty caches
tqdm.write(f"{Fore.YELLOW}{Style.RESET_ALL} Cache preload skipped (database error)\n")
tqdm.write("")
def load_cache_for_model(self, progress_bar: tqdm, name: str, model: type, cache_attr: str) -> None:
"""Load cache for a specific model and attach to the command instance.
Args:
progress_bar: TQDM progress bar instance.
name: Human-readable name of the model being cached.
model: Django model class to query.
cache_attr: Attribute name on the command instance to store the cache.
"""
progress_bar.set_description(f"Loading {name}")
try:
cache: dict[str, Any] = {str(obj.twitch_id): obj for obj in model.objects.all()}
setattr(self, cache_attr, cache)
progress_bar.write(f" {Fore.GREEN}{Style.RESET_ALL} {name}: {len(cache):,}")
except (DatabaseError, OSError, RuntimeError, ValueError, TypeError) as e:
# Database error - skip this cache
msg: str = f" {Fore.YELLOW}{Style.RESET_ALL} {name}: Could not load ({type(e).__name__})"
progress_bar.write(msg)
setattr(self, cache_attr, {})
def _validate_campaigns(
self,
campaigns_found: list[dict[str, Any]],
file_path: Path,
options: dict[str, Any],
) -> list[GraphQLResponse]:
"""Validate campaign data using Pydantic schema.
Args:
campaigns_found: List of raw campaign dictionaries.
file_path: Path to the file being processed.
options: Command options.
Returns:
List of validated Pydantic GraphQLResponse models.
Raises:
ValidationError: If campaign data fails Pydantic validation
and crash-on-error is enabled.
"""
valid_campaigns: list[GraphQLResponse] = []
if isinstance(campaigns_found, list):
for campaign in campaigns_found:
if isinstance(campaign, dict):
try:
response: GraphQLResponse = GraphQLResponse.model_validate(campaign)
if response.data.current_user and response.data.current_user.drop_campaigns:
valid_campaigns.append(response)
except ValidationError as e:
tqdm.write(
f"{Fore.RED}{Style.RESET_ALL} Validation failed for an entry in {file_path.name}: {e}",
)
# Move invalid inputs out of the hot path so future runs can progress.
if not options.get("skip_broken_moves"):
op_name: str | None = extract_operation_name_from_parsed(campaign)
move_failed_validation_file(file_path, operation_name=op_name)
# optionally crash early to surface schema issues.
if options.get("crash_on_error"):
raise
continue
return valid_campaigns
def _get_or_create_organization(
self,
org_data: OrganizationSchema,
) -> Organization:
"""Get or create an organization from cache or database.
Args:
org_data: Organization data from Pydantic model.
Returns:
Organization instance.
"""
# Prefer cache hits to avoid hitting the DB on every campaign item.
if org_data.twitch_id in self.organization_cache:
return self.organization_cache[org_data.twitch_id]
org_obj, created = Organization.objects.update_or_create(
twitch_id=org_data.twitch_id,
defaults={
"name": org_data.name,
},
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new organization: {org_data.name}")
# Cache the organization for future lookups.
self.organization_cache[org_data.twitch_id] = org_obj
return org_obj
def _get_or_create_game(
self,
game_data: GameSchema,
org_obj: Organization,
) -> Game:
"""Get or create a game from cache or database.
Args:
game_data: Game data from Pydantic model.
org_obj: Organization that owns this game.
Returns:
Game instance.
"""
if game_data.twitch_id in self.game_cache:
game_obj: Game = self.game_cache[game_data.twitch_id]
# Maintenance: Ensure the existing game is linked to the
# correct owner (Sometimes games are imported without owner
# data first). Use owner_id to avoid triggering a query.
# Correct stale owner linkage that may exist from earlier
# partial imports.
if game_obj.owner_id != org_obj.pk: # type: ignore[attr-defined] # Django adds _id suffix for FK fields
game_obj.owner = org_obj
game_obj.save(update_fields=["owner"])
return game_obj
game_obj, created = Game.objects.update_or_create(
twitch_id=game_data.twitch_id,
defaults={
"display_name": game_data.display_name,
"box_art": game_data.box_art_url,
"owner": org_obj,
},
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new game: {game_data.display_name}")
self.game_cache[game_data.twitch_id] = game_obj
return game_obj
def _should_skip_campaign_update(
self,
cached_obj: DropCampaign,
defaults: dict[str, Any],
game_obj: Game,
) -> bool:
"""Check if campaign update can be skipped based on cache comparison.
Args:
cached_obj: Cached campaign object.
defaults: New campaign data.
game_obj: Associated game object.
Returns:
True if no update needed, False otherwise.
"""
# Use game_id (Django's auto-generated FK field) to avoid
# triggering a query. Compare FK IDs to avoid ORM reads; keeps
# this a pure in-memory check.
cached_game_id: int | None = getattr(cached_obj, "game_id", None)
# Ensure game object has a primary key (should always be true
# at this point)
game_id: int | None = game_obj.pk
# Short-circuit updates when nothing changed; reduces write
# load and log noise while keeping caches accurate.
return bool(
cached_obj.name == defaults["name"]
and cached_obj.start_at == defaults["start_at"]
and cached_obj.end_at == defaults["end_at"]
and cached_obj.details_url == defaults["details_url"]
and cached_obj.account_link_url == defaults["account_link_url"]
and cached_game_id == game_id
and cached_obj.is_account_connected == defaults["is_account_connected"],
)
def process_campaigns(
self,
campaigns_found: list[dict[str, Any]],
file_path: Path,
options: dict[str, Any],
) -> None:
"""Process, validate, and import campaign data.
With dependency resolution and caching.
Args:
campaigns_found: List of raw campaign dictionaries to process.
file_path: Path to the file being processed.
options: Command options dictionary.
Raises:
ValueError: If datetime parsing fails for campaign dates and
crash-on-error is enabled.
"""
valid_campaigns: list[GraphQLResponse] = self._validate_campaigns(
campaigns_found=campaigns_found,
file_path=file_path,
options=options,
)
for response in valid_campaigns:
if not response.data.current_user:
continue
for drop_campaign in response.data.current_user.drop_campaigns:
org_obj: Organization = self._get_or_create_organization(
org_data=drop_campaign.owner,
)
game_obj: Game = self._get_or_create_game(
game_data=drop_campaign.game,
org_obj=org_obj,
)
start_at_dt: datetime | None = parse_date(drop_campaign.start_at)
end_at_dt: datetime | None = parse_date(drop_campaign.end_at)
if start_at_dt is None or end_at_dt is None:
tqdm.write(f"{Fore.RED}{Style.RESET_ALL} Invalid datetime in campaign: {drop_campaign.name}")
if options.get("crash_on_error"):
msg: str = f"Failed to parse datetime for campaign {drop_campaign.name}"
raise ValueError(msg)
continue
defaults: dict[str, str | datetime | Game | bool] = {
"name": drop_campaign.name,
"game": game_obj,
"start_at": start_at_dt,
"end_at": end_at_dt,
"details_url": drop_campaign.details_url,
"account_link_url": drop_campaign.account_link_url,
"is_account_connected": (drop_campaign.self.is_account_connected),
}
if drop_campaign.twitch_id in self.drop_campaign_cache:
cached_obj: DropCampaign = self.drop_campaign_cache[drop_campaign.twitch_id]
if self._should_skip_campaign_update(cached_obj=cached_obj, defaults=defaults, game_obj=game_obj):
if options.get("verbose"):
tqdm.write(f"{Fore.YELLOW}{Style.RESET_ALL} Skipped (No changes): {drop_campaign.name}")
continue
campaign_obj, created = DropCampaign.objects.update_or_create(
twitch_id=drop_campaign.twitch_id,
defaults=defaults,
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new campaign: {drop_campaign.name}")
self.drop_campaign_cache[drop_campaign.twitch_id] = campaign_obj
action: Literal["Imported new", "Updated"] = "Imported new" if created else "Updated"
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} {action} campaign: {drop_campaign.name}")
if (
response.extensions
and response.extensions.operation_name
and campaign_obj.operation_name != response.extensions.operation_name
):
campaign_obj.operation_name = response.extensions.operation_name
campaign_obj.save(update_fields=["operation_name"])
if drop_campaign.time_based_drops:
self._process_time_based_drops(
time_based_drops_schema=drop_campaign.time_based_drops,
campaign_obj=campaign_obj,
)
def _process_time_based_drops(
self,
time_based_drops_schema: list[TimeBasedDropSchema],
campaign_obj: DropCampaign,
) -> None:
"""Process time-based drops for a campaign.
Args:
time_based_drops_schema: List of TimeBasedDrop Pydantic schemas.
campaign_obj: The DropCampaign database object.
"""
for drop_schema in time_based_drops_schema:
start_at_dt: datetime | None = parse_date(drop_schema.start_at)
end_at_dt: datetime | None = parse_date(drop_schema.end_at)
drop_defaults: dict[str, str | int | datetime | DropCampaign] = {
"campaign": campaign_obj,
"name": drop_schema.name,
"required_subs": drop_schema.required_subs,
}
if drop_schema.required_minutes_watched is not None:
drop_defaults["required_minutes_watched"] = drop_schema.required_minutes_watched
if start_at_dt is not None:
drop_defaults["start_at"] = start_at_dt
if end_at_dt is not None:
drop_defaults["end_at"] = end_at_dt
drop_obj, created = TimeBasedDrop.objects.update_or_create(
twitch_id=drop_schema.twitch_id,
defaults=drop_defaults,
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created TimeBasedDrop: {drop_schema.name}")
self._process_benefit_edges(
benefit_edges_schema=drop_schema.benefit_edges,
drop_obj=drop_obj,
)
def _get_or_update_benefit(self, benefit_schema: DropBenefitSchema) -> DropBenefit:
"""Return a DropBenefit, updating stale cached values when needed."""
benefit_defaults: dict[str, str | int | datetime | bool | None] = {
"name": benefit_schema.name,
"image_asset_url": benefit_schema.image_asset_url,
"entitlement_limit": benefit_schema.entitlement_limit,
"is_ios_available": benefit_schema.is_ios_available,
"distribution_type": benefit_schema.distribution_type,
}
if benefit_schema.created_at:
created_at_dt: datetime | None = parse_date(benefit_schema.created_at)
if created_at_dt:
benefit_defaults["created_at"] = created_at_dt
cached_benefit: DropBenefit | None = self.benefit_cache.get(benefit_schema.twitch_id)
if cached_benefit:
update_fields: list[str] = []
for field_name, value in benefit_defaults.items():
if getattr(cached_benefit, field_name) != value:
setattr(cached_benefit, field_name, value)
update_fields.append(field_name)
if update_fields:
cached_benefit.save(update_fields=update_fields)
benefit_obj: DropBenefit = cached_benefit
else:
benefit_obj, created = DropBenefit.objects.update_or_create(
twitch_id=benefit_schema.twitch_id,
defaults=benefit_defaults,
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created DropBenefit: {benefit_schema.name}")
self.benefit_cache[benefit_schema.twitch_id] = benefit_obj
return benefit_obj
def _process_benefit_edges(
self,
benefit_edges_schema: list[DropBenefitEdgeSchema],
drop_obj: TimeBasedDrop,
) -> None:
"""Process benefit edges for a time-based drop.
Args:
benefit_edges_schema: List of DropBenefitEdge Pydantic schemas.
drop_obj: The TimeBasedDrop database object.
"""
for edge_schema in benefit_edges_schema:
benefit_schema: DropBenefitSchema = edge_schema.benefit
benefit_obj: DropBenefit = self._get_or_update_benefit(benefit_schema=benefit_schema)
_edge_obj, created = DropBenefitEdge.objects.update_or_create(
drop=drop_obj,
benefit=benefit_obj,
defaults={"entitlement_limit": edge_schema.entitlement_limit},
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Linked benefit: {benefit_schema.name}{drop_obj.name}")
def handle(self, *args, **options) -> None: # noqa: ARG002
"""Main entry point for the command.
@ -152,7 +693,7 @@ class Command(BaseCommand):
except KeyboardInterrupt:
tqdm.write(self.style.WARNING("\n\nInterrupted by user!"))
tqdm.write(self.style.WARNING("Shutting down gracefully..."))
sys.exit(130)
sys.exit(130) # 128 + 2 (Keyboard Interrupt)
def process_json_files(self, input_path: Path, options: dict) -> None:
"""Process multiple JSON files in a directory.
@ -168,37 +709,39 @@ class Command(BaseCommand):
failed_count = 0
error_count = 0
with (
ProcessPoolExecutor() as executor,
tqdm(
total=len(json_files),
desc="Processing",
unit="file",
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
colour="green",
dynamic_ncols=True,
) as progress_bar,
):
# Choose a reasonable chunk_size to reduce overhead for huge file counts
cpu_count = os.cpu_count() or 1
chunk_size = max(1, min(1000, len(json_files) // (cpu_count * 8 or 1)))
results_iter = executor.map(self.process_file_worker, json_files, repeat(options), chunksize=chunk_size)
with tqdm(
total=len(json_files),
desc="Processing",
unit="file",
bar_format=("{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]"),
colour="green",
dynamic_ncols=True,
) as progress_bar:
for file_path in json_files:
try:
result: dict[str, bool | str] = next(results_iter)
result: dict[str, bool | str] = self.process_file_worker(
file_path=file_path,
options=options,
)
if result["success"]:
success_count += 1
if options.get("verbose"):
progress_bar.write(f"{Fore.GREEN}{Style.RESET_ALL} {file_path.name}")
else:
failed_count += 1
reason = result.get("reason") if isinstance(result, dict) else None
reason: bool | str | None = result.get("reason") if isinstance(result, dict) else None
if reason:
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{result['broken_dir']}/{file_path.name} ({reason})")
progress_bar.write(
f"{Fore.RED}{Style.RESET_ALL} "
f"{file_path.name}{result['broken_dir']}/"
f"{file_path.name} ({reason})",
)
else:
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{result['broken_dir']}/{file_path.name}")
progress_bar.write(
f"{Fore.RED}{Style.RESET_ALL} "
f"{file_path.name}{result['broken_dir']}/"
f"{file_path.name}",
)
except (OSError, ValueError, KeyError) as e:
error_count += 1
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name} (error: {e})")
@ -207,15 +750,27 @@ class Command(BaseCommand):
progress_bar.set_postfix_str(f"{success_count} | ✗ {failed_count + error_count}", refresh=True)
progress_bar.update(1)
self.print_processing_summary(json_files, success_count, failed_count, error_count)
self.print_processing_summary(
json_files,
success_count,
failed_count,
error_count,
)
def print_processing_summary(self, json_files: list[Path], success_count: int, failed_count: int, error_count: int) -> None:
def print_processing_summary(
self,
json_files: list[Path],
success_count: int,
failed_count: int,
error_count: int,
) -> None:
"""Print a summary of the batch processing results.
Args:
json_files: List of JSON file paths that were processed.
success_count: Number of files processed successfully.
failed_count: Number of files that failed validation and were moved.
failed_count: Number of files that failed validation and were
moved.
error_count: Number of files that encountered unexpected errors.
"""
tqdm.write("\n" + "=" * 50)
@ -227,7 +782,11 @@ class Command(BaseCommand):
tqdm.write(f"Total: {len(json_files)}")
tqdm.write("=" * 50)
def collect_json_files(self, options: dict, input_path: Path) -> list[Path]:
def collect_json_files(
self,
options: dict,
input_path: Path,
) -> list[Path]:
"""Collect JSON files from the specified directory.
Args:
@ -246,9 +805,12 @@ class Command(BaseCommand):
json_files = [f for f in input_path.iterdir() if f.is_file() and f.suffix == ".json"]
return json_files
@staticmethod
def process_file_worker(file_path: Path, options: dict) -> dict[str, bool | str]:
"""Worker function for parallel processing of files.
def process_file_worker(
self,
file_path: Path,
options: dict,
) -> dict[str, bool | str]:
"""Worker function for processing files.
Args:
file_path: Path to the JSON file to process
@ -256,26 +818,49 @@ class Command(BaseCommand):
Raises:
ValidationError: If the JSON file fails validation
json.JSONDecodeError: If the JSON file cannot be parsed
Returns:
Dict with success status and optional broken_dir path
"""
try:
raw_text: str = file_path.read_text(encoding="utf-8", errors="ignore")
# Fast pre-filter: check for known non-campaign keywords and move early
matched: str | None = detect_non_campaign_keyword(raw_text)
if matched:
broken_dir: Path = move_file_to_broken_subdir(file_path, matched)
return {"success": False, "broken_dir": str(broken_dir), "reason": f"matched '{matched}'"}
if not options.get("skip_broken_moves"):
broken_dir: Path = move_file_to_broken_subdir(file_path, matched)
return {"success": False, "broken_dir": str(broken_dir), "reason": f"matched '{matched}'"}
return {"success": False, "broken_dir": "(skipped)", "reason": f"matched '{matched}'"}
if "dropCampaign" not in raw_text:
if not options.get("skip_broken_moves"):
broken_dir = move_file_to_broken_subdir(file_path, "no_dropCampaign")
return {"success": False, "broken_dir": str(broken_dir), "reason": "no dropCampaign present"}
return {"success": False, "broken_dir": "(skipped)", "reason": "no dropCampaign present"}
parsed_json: dict[str, Any] = json.loads(raw_text)
operation_name: str | None = extract_operation_name_from_parsed(parsed_json)
campaigns_found: list[dict[str, Any]] = [parsed_json]
self.process_campaigns(
campaigns_found=campaigns_found,
file_path=file_path,
options=options,
)
ViewerDropsDashboardPayload.model_validate_json(raw_text)
except ValidationError:
move_completed_file(file_path=file_path, operation_name=operation_name)
except (ValidationError, json.JSONDecodeError):
if options["crash_on_error"]:
raise
broken_dir: Path = move_failed_validation_file(file_path)
return {"success": False, "broken_dir": str(broken_dir)}
if not options.get("skip_broken_moves"):
parsed_json_local: Any | None = locals().get("parsed_json")
op_name: str | None = (
extract_operation_name_from_parsed(parsed_json_local)
if isinstance(parsed_json_local, (dict, list))
else None
)
broken_dir: Path = move_failed_validation_file(file_path, operation_name=op_name)
return {"success": False, "broken_dir": str(broken_dir)}
return {"success": False, "broken_dir": "(skipped)"}
else:
return {"success": True}
@ -288,6 +873,7 @@ class Command(BaseCommand):
Raises:
ValidationError: If the JSON file fails validation
json.JSONDecodeError: If the JSON file cannot be parsed
"""
with tqdm(
total=1,
@ -299,19 +885,58 @@ class Command(BaseCommand):
try:
raw_text: str = file_path.read_text(encoding="utf-8", errors="ignore")
# Fast pre-filter for non-campaign responses
matched: str | None = detect_non_campaign_keyword(raw_text)
if matched:
broken_dir: Path = move_file_to_broken_subdir(file_path, matched)
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{broken_dir}/{file_path.name} (matched '{matched}')")
if not options.get("skip_broken_moves"):
broken_dir: Path = move_file_to_broken_subdir(file_path, matched)
progress_bar.write(
f"{Fore.RED}{Style.RESET_ALL} {file_path.name}"
f"{broken_dir}/{file_path.name} "
f"(matched '{matched}')",
)
else:
progress_bar.write(
f"{Fore.RED}{Style.RESET_ALL} {file_path.name} (matched '{matched}', move skipped)",
)
return
_: ViewerDropsDashboardPayload = ViewerDropsDashboardPayload.model_validate_json(raw_text)
if "dropCampaign" not in raw_text:
if not options.get("skip_broken_moves"):
broken_dir = move_file_to_broken_subdir(file_path, "no_dropCampaign")
progress_bar.write(
f"{Fore.RED}{Style.RESET_ALL} {file_path.name}"
f"{broken_dir}/{file_path.name} "
f"(no dropCampaign present)",
)
else:
progress_bar.write(
f"{Fore.RED}{Style.RESET_ALL} {file_path.name} (no dropCampaign present, move skipped)",
)
return
parsed_json: dict[str, Any] = json.loads(raw_text)
operation_name: str | None = extract_operation_name_from_parsed(parsed_json)
campaigns_found: list[dict[str, Any]] = [parsed_json]
self.process_campaigns(campaigns_found=campaigns_found, file_path=file_path, options=options)
move_completed_file(file_path=file_path, operation_name=operation_name)
progress_bar.update(1)
progress_bar.write(f"{Fore.GREEN}{Style.RESET_ALL} {file_path.name}")
except ValidationError:
except (ValidationError, json.JSONDecodeError):
if options["crash_on_error"]:
raise
broken_dir: Path = move_failed_validation_file(file_path)
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{broken_dir}/{file_path.name}")
if not options.get("skip_broken_moves"):
parsed_json_local: Any | None = locals().get("parsed_json")
op_name: str | None = (
extract_operation_name_from_parsed(parsed_json_local)
if isinstance(parsed_json_local, (dict, list))
else None
)
broken_dir: Path = move_failed_validation_file(file_path, operation_name=op_name)
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{broken_dir}/{file_path.name}")
else:
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name} (move skipped)")

View file

@ -52,7 +52,10 @@ def parse_date(value: str | None) -> datetime | None:
"RETURN_AS_TIMEZONE_AWARE": True,
"CACHE_SIZE_LIMIT": 0,
}
dt: datetime | None = dateparser.parse(date_string=value, settings=dateparser_settings) # pyright: ignore[reportArgumentType]
dt: datetime | None = dateparser.parse(
date_string=value,
settings=dateparser_settings, # pyright: ignore[reportArgumentType]
)
if not dt:
return None
@ -63,7 +66,7 @@ def parse_date(value: str | None) -> datetime | None:
class Command(BaseCommand):
"""Import Twitch drop campaign data from a JSON file or directory of JSON files."""
"""Import Twitch drop campaign data from JSON."""
help = "Import Twitch drop campaign data from a JSON file or directory"
requires_migrations_checks = True
@ -110,7 +113,7 @@ class Command(BaseCommand):
parser.add_argument(
"--no-preload",
action="store_true",
help="Do not preload existing DB objects into memory (default: preload).",
help="Do not preload existing DB objects into memory.",
)
def handle(self, **options) -> None:
@ -126,7 +129,6 @@ class Command(BaseCommand):
AttributeError: If expected attributes are missing in the data.
KeyError: If expected keys are missing in the data.
IndexError: If list indices are out of range in the data.
"""
paths: list[str] = options["paths"]
processed_dir: str = options["processed_dir"]
@ -136,7 +138,9 @@ class Command(BaseCommand):
# Preload DB objects into caches (unless disabled)
if not no_preload:
try:
self.stdout.write("Preloading existing database objects into memory...")
self.stdout.write(
"Preloading existing database objects into memory...",
)
self._preload_caches()
self.stdout.write(
f"Preloaded {len(self._game_cache)} games, "
@ -147,7 +151,8 @@ class Command(BaseCommand):
)
except (FileNotFoundError, OSError, RuntimeError):
# If preload fails for any reason, continue without it
self.stdout.write(self.style.WARNING("Preloading caches failed — continuing without preload."))
msg = "Warning: Preloading caches failed — continuing without preload."
self.stdout.write(self.style.WARNING(msg))
self.stdout.write(self.style.ERROR(traceback.format_exc()))
self._game_cache = {}
self._organization_cache = {}
@ -167,37 +172,77 @@ class Command(BaseCommand):
processed_path: Path = path / processed_dir
processed_path.mkdir(exist_ok=True)
self.process_drops(continue_on_error=continue_on_error, path=path, processed_path=processed_path)
self.process_drops(
continue_on_error=continue_on_error,
path=path,
processed_path=processed_path,
)
except CommandError as e:
if not continue_on_error:
raise
self.stdout.write(self.style.ERROR(f"Error processing path {p}: {e}"))
except (ValueError, TypeError, AttributeError, KeyError, IndexError):
self.stdout.write(
self.style.ERROR(f"Error processing path {p}: {e}"),
)
except (
ValueError,
TypeError,
AttributeError,
KeyError,
IndexError,
):
if not continue_on_error:
raise
self.stdout.write(self.style.ERROR(f"Data error processing path {p}"))
self.stdout.write(
self.style.ERROR(f"Data error processing path {p}"),
)
self.stdout.write(self.style.ERROR(traceback.format_exc()))
except KeyboardInterrupt:
# Gracefully handle Ctrl+C
self.stdout.write(self.style.WARNING("Interrupted by user, exiting import."))
self.stdout.write(
self.style.WARNING("Interrupted by user, exiting import."),
)
return
def _preload_caches(self) -> None:
"""Load existing DB objects into in-memory caches to avoid repeated queries."""
# These queries may be heavy if DB is huge — safe because optional via --no-preload
"""Load DB objects into in-memory caches to avoid repeated queries."""
with self._cache_locks["game"]:
self._game_cache = {str(g.twitch_id): g for g in Game.objects.all()}
with self._cache_locks["org"]:
self._organization_cache = {str(o.twitch_id): o for o in Organization.objects.all()}
with self._cache_locks["campaign"]:
self._drop_campaign_cache = {str(c.twitch_id): c for c in DropCampaign.objects.all()}
with self._cache_locks["channel"]:
self._channel_cache = {str(ch.twitch_id): ch for ch in Channel.objects.all()}
with self._cache_locks["benefit"]:
self._benefit_cache = {str(b.twitch_id): b for b in DropBenefit.objects.all()}
self._game_cache = {} # Clear existing cache
for game_instance in Game.objects.all():
twitch_id = str(game_instance.twitch_id)
self._game_cache[twitch_id] = game_instance
def process_drops(self, *, continue_on_error: bool, path: Path, processed_path: Path) -> None:
with self._cache_locks["org"]:
self._organization_cache = {}
for organization_instance in Organization.objects.all():
twitch_id = str(organization_instance.twitch_id)
self._organization_cache[twitch_id] = organization_instance
with self._cache_locks["campaign"]:
self._drop_campaign_cache = {}
for drop_campaign_instance in DropCampaign.objects.all():
twitch_id = str(drop_campaign_instance.twitch_id)
self._drop_campaign_cache[twitch_id] = drop_campaign_instance
with self._cache_locks["channel"]:
self._channel_cache = {}
for channel_instance in Channel.objects.all():
twitch_id = str(channel_instance.twitch_id)
self._channel_cache[twitch_id] = channel_instance
with self._cache_locks["benefit"]:
self._benefit_cache = {}
for benefit_instance in DropBenefit.objects.all():
twitch_id = str(benefit_instance.twitch_id)
self._benefit_cache[twitch_id] = benefit_instance
def process_drops(
self,
*,
continue_on_error: bool,
path: Path,
processed_path: Path,
) -> None:
"""Process drops from a file or directory.
Args:
@ -233,7 +278,13 @@ class Command(BaseCommand):
msg: str = f"Path {path} does not exist"
raise CommandError(msg)
def _process_directory(self, *, directory: Path, processed_path: Path, continue_on_error: bool) -> None:
def _process_directory(
self,
*,
directory: Path,
processed_path: Path,
continue_on_error: bool,
) -> None:
"""Process all JSON files in a directory using parallel processing.
Args:
@ -252,7 +303,9 @@ class Command(BaseCommand):
"""
json_files: list[Path] = list(directory.glob("*.json"))
if not json_files:
self.stdout.write(self.style.WARNING(f"No JSON files found in {directory}"))
self.stdout.write(
self.style.WARNING(f"No JSON files found in {directory}"),
)
return
total_files: int = len(json_files)
@ -261,10 +314,19 @@ class Command(BaseCommand):
with concurrent.futures.ThreadPoolExecutor() as executor:
try:
future_to_file: dict[concurrent.futures.Future[None], Path] = {
executor.submit(self._process_file, json_file, processed_path): json_file for json_file in json_files
executor.submit(
self._process_file,
json_file,
processed_path,
): json_file
for json_file in json_files
}
# Wrap the as_completed iterator with tqdm for a progress bar
for future in tqdm(concurrent.futures.as_completed(future_to_file), total=total_files, desc="Processing files"):
for future in tqdm(
concurrent.futures.as_completed(future_to_file),
total=total_files,
desc="Processing files",
):
json_file: Path = future_to_file[future]
try:
future.result()
@ -273,20 +335,42 @@ class Command(BaseCommand):
# To stop all processing, we shut down the executor and re-raise
executor.shutdown(wait=False, cancel_futures=True)
raise
self.stdout.write(self.style.ERROR(f"Error processing {json_file}: {e}"))
except (ValueError, TypeError, AttributeError, KeyError, IndexError):
self.stdout.write(
self.style.ERROR(
f"Error processing {json_file}: {e}",
),
)
except (
ValueError,
TypeError,
AttributeError,
KeyError,
IndexError,
):
if not continue_on_error:
# To stop all processing, we shut down the executor and re-raise
executor.shutdown(wait=False, cancel_futures=True)
raise
self.stdout.write(self.style.ERROR(f"Data error processing {json_file}"))
self.stdout.write(self.style.ERROR(traceback.format_exc()))
self.stdout.write(
self.style.ERROR(
f"Data error processing {json_file}",
),
)
self.stdout.write(
self.style.ERROR(traceback.format_exc()),
)
msg: str = f"Processed {total_files} JSON files in {directory}. Moved processed files to {processed_path}."
msg: str = (
f"Processed {total_files} JSON files in {directory}. Moved processed files to {processed_path}."
)
self.stdout.write(self.style.SUCCESS(msg))
except KeyboardInterrupt:
self.stdout.write(self.style.WARNING("Interruption received, shutting down threads immediately..."))
self.stdout.write(
self.style.WARNING(
"Interruption received, shutting down threads immediately...",
),
)
executor.shutdown(wait=False, cancel_futures=True)
# Re-raise the exception to allow the main `handle` method to catch it and exit
raise
@ -331,7 +415,9 @@ class Command(BaseCommand):
target_dir.mkdir(parents=True, exist_ok=True)
self.move_file(file_path, target_dir / file_path.name)
tqdm.write(f"Moved {file_path} to {target_dir} (matched '{keyword}')")
tqdm.write(
f"Moved {file_path} to {target_dir} (matched '{keyword}')",
)
return
# Some responses have errors:
@ -341,7 +427,9 @@ class Command(BaseCommand):
actual_error_dir: Path = processed_path / "actual_error"
actual_error_dir.mkdir(parents=True, exist_ok=True)
self.move_file(file_path, actual_error_dir / file_path.name)
tqdm.write(f"Moved {file_path} to {actual_error_dir} (contains Twitch errors)")
tqdm.write(
f"Moved {file_path} to {actual_error_dir} (contains Twitch errors)",
)
return
# If file has "__typename": "BroadcastSettings" move it to the "broadcast_settings" directory
@ -360,7 +448,9 @@ class Command(BaseCommand):
and data["data"]["channel"]["viewerDropCampaigns"] is None
):
file_path.unlink()
tqdm.write(f"Removed {file_path} (only contains empty viewerDropCampaigns)")
tqdm.write(
f"Removed {file_path} (only contains empty viewerDropCampaigns)",
)
return
# If file only contains {"data": {"user": null}} remove the file
@ -377,11 +467,18 @@ class Command(BaseCommand):
tqdm.write(f"Removed {file_path} (only contains game data)")
return
# If file has "__typename": "DropCurrentSession" move it to the "drop_current_session" directory so we can process it separately.
if isinstance(data, dict) and data.get("data", {}).get("currentUser", {}).get("dropCurrentSession", {}).get("__typename") == "DropCurrentSession":
# If file has "__typename": "DropCurrentSession" move it to the "drop_current_session" directory so we can process it separately. # noqa: E501
if (
isinstance(data, dict)
and data.get("data", {}).get("currentUser", {}).get("dropCurrentSession", {}).get("__typename")
== "DropCurrentSession"
):
drop_current_session_dir: Path = processed_path / "drop_current_session"
drop_current_session_dir.mkdir(parents=True, exist_ok=True)
self.move_file(file_path, drop_current_session_dir / file_path.name)
self.move_file(
file_path,
drop_current_session_dir / file_path.name,
)
return
# If file is a list with one item: {"data": {"user": null}}, remove it
@ -407,7 +504,10 @@ class Command(BaseCommand):
# Move file to "we_should_double_check" directory for manual review
we_should_double_check_dir: Path = processed_path / "we_should_double_check"
we_should_double_check_dir.mkdir(parents=True, exist_ok=True)
self.move_file(file_path, we_should_double_check_dir / file_path.name)
self.move_file(
file_path,
we_should_double_check_dir / file_path.name,
)
raise CommandError(msg)
self.move_file(file_path, processed_path)
@ -426,19 +526,33 @@ class Command(BaseCommand):
if f1.read() != f2.read():
new_name: Path = processed_path / f"{file_path.stem}_duplicate{file_path.suffix}"
shutil.move(str(file_path), str(new_name))
tqdm.write(f"Moved {file_path!s} to {new_name!s} (content differs)")
tqdm.write(
f"Moved {file_path!s} to {new_name!s} (content differs)",
)
else:
tqdm.write(f"{file_path!s} already exists in {processed_path!s}, removing original file.")
tqdm.write(
f"{file_path!s} already exists in {processed_path!s}, removing original file.",
)
file_path.unlink()
except FileNotFoundError:
tqdm.write(f"{file_path!s} not found when handling duplicate case, skipping.")
tqdm.write(
f"{file_path!s} not found when handling duplicate case, skipping.",
)
except FileNotFoundError:
tqdm.write(f"{file_path!s} not found, skipping.")
except (PermissionError, OSError, shutil.Error) as e:
self.stdout.write(self.style.ERROR(f"Error moving {file_path!s} to {processed_path!s}: {e}"))
self.stdout.write(
self.style.ERROR(
f"Error moving {file_path!s} to {processed_path!s}: {e}",
),
)
traceback.print_exc()
def import_drop_campaign(self, data: dict[str, Any], file_path: Path) -> None:
def import_drop_campaign(
self,
data: dict[str, Any],
file_path: Path,
) -> None:
"""Find and import drop campaign data from various JSON structures."""
# Add this check: If this is a known "empty" response, ignore it silently.
if (
@ -475,7 +589,9 @@ class Command(BaseCommand):
# Structure: {"data": {"currentUser": {"inventory": {"dropCampaignsInProgress": [...]}}}}
if "inventory" in current_user and "dropCampaignsInProgress" in current_user["inventory"]:
campaigns_found.extend(current_user["inventory"]["dropCampaignsInProgress"])
campaigns_found.extend(
current_user["inventory"]["dropCampaignsInProgress"],
)
# Structure: {"data": {"channel": {"viewerDropCampaigns": [...]}}}
if "channel" in d and d["channel"] and "viewerDropCampaigns" in d["channel"]:
@ -507,9 +623,17 @@ class Command(BaseCommand):
self.import_to_db(data, file_path=file_path)
return
tqdm.write(self.style.WARNING(f"No valid drop campaign data found in {file_path.name}"))
tqdm.write(
self.style.WARNING(
f"No valid drop campaign data found in {file_path.name}",
),
)
def import_to_db(self, campaign_data: dict[str, Any], file_path: Path) -> None:
def import_to_db(
self,
campaign_data: dict[str, Any],
file_path: Path,
) -> None:
"""Import drop campaign data into the database with retry logic for SQLite locks.
Args:
@ -517,25 +641,51 @@ class Command(BaseCommand):
file_path: The path to the file being processed.
"""
with transaction.atomic():
game: Game = self.game_update_or_create(campaign_data=campaign_data)
organization: Organization | None = self.owner_update_or_create(campaign_data=campaign_data)
game: Game = self.game_update_or_create(
campaign_data=campaign_data,
)
organization: Organization | None = self.owner_update_or_create(
campaign_data=campaign_data,
)
if organization and game.owner != organization:
game.owner = organization
game.save(update_fields=["owner"])
drop_campaign: DropCampaign = self.drop_campaign_update_or_get(campaign_data=campaign_data, game=game)
drop_campaign: DropCampaign = self.drop_campaign_update_or_get(
campaign_data=campaign_data,
game=game,
)
for drop_data in campaign_data.get("timeBasedDrops", []):
self._process_time_based_drop(drop_data, drop_campaign, file_path)
self._process_time_based_drop(
drop_data,
drop_campaign,
file_path,
)
def _process_time_based_drop(self, drop_data: dict[str, Any], drop_campaign: DropCampaign, file_path: Path) -> None:
time_based_drop: TimeBasedDrop = self.create_time_based_drop(drop_campaign=drop_campaign, drop_data=drop_data)
def _process_time_based_drop(
self,
drop_data: dict[str, Any],
drop_campaign: DropCampaign,
file_path: Path,
) -> None:
time_based_drop: TimeBasedDrop = self.create_time_based_drop(
drop_campaign=drop_campaign,
drop_data=drop_data,
)
benefit_edges: list[dict[str, Any]] = drop_data.get("benefitEdges", [])
if not benefit_edges:
tqdm.write(self.style.WARNING(f"No benefit edges found for drop {time_based_drop.name} (ID: {time_based_drop.twitch_id})"))
self.move_file(file_path, Path("no_benefit_edges") / file_path.name)
tqdm.write(
self.style.WARNING(
f"No benefit edges found for drop {time_based_drop.name} (ID: {time_based_drop.twitch_id})",
),
)
self.move_file(
file_path,
Path("no_benefit_edges") / file_path.name,
)
return
for benefit_edge in benefit_edges:
@ -558,14 +708,22 @@ class Command(BaseCommand):
benefit_defaults = {k: v for k, v in benefit_defaults.items() if v is not None}
# Use cached create/update for benefits
benefit = self._get_or_create_benefit(benefit_data["id"], benefit_defaults)
benefit = self._get_or_create_benefit(
benefit_data["id"],
benefit_defaults,
)
try:
with transaction.atomic():
drop_benefit_edge, created = DropBenefitEdge.objects.update_or_create(
drop=time_based_drop,
benefit=benefit,
defaults={"entitlement_limit": benefit_edge.get("entitlementLimit", 1)},
defaults={
"entitlement_limit": benefit_edge.get(
"entitlementLimit",
1,
),
},
)
if created:
tqdm.write(f"Added {drop_benefit_edge}")
@ -573,10 +731,14 @@ class Command(BaseCommand):
msg = f"Error: Multiple DropBenefitEdge objects found for drop {time_based_drop.twitch_id} and benefit {benefit.twitch_id}. Cannot update or create." # noqa: E501
raise CommandError(msg) from e
except (IntegrityError, DatabaseError, TypeError, ValueError) as e:
msg = f"Database or validation error creating DropBenefitEdge for drop {time_based_drop.twitch_id} and benefit {benefit.twitch_id}: {e}"
msg = f"Database or validation error creating DropBenefitEdge for drop {time_based_drop.twitch_id} and benefit {benefit.twitch_id}: {e}" # noqa: E501
raise CommandError(msg) from e
def create_time_based_drop(self, drop_campaign: DropCampaign, drop_data: dict[str, Any]) -> TimeBasedDrop:
def create_time_based_drop(
self,
drop_campaign: DropCampaign,
drop_data: dict[str, Any],
) -> TimeBasedDrop:
"""Creates or updates a TimeBasedDrop instance based on the provided drop data.
Args:
@ -598,7 +760,9 @@ class Command(BaseCommand):
time_based_drop_defaults: dict[str, Any] = {
"campaign": drop_campaign,
"name": drop_data.get("name"),
"required_minutes_watched": drop_data.get("requiredMinutesWatched"),
"required_minutes_watched": drop_data.get(
"requiredMinutesWatched",
),
"required_subs": drop_data.get("requiredSubs"),
"start_at": parse_date(drop_data.get("startAt")),
"end_at": parse_date(drop_data.get("endAt")),
@ -614,7 +778,10 @@ class Command(BaseCommand):
try:
with transaction.atomic():
time_based_drop, created = TimeBasedDrop.objects.update_or_create(id=drop_data["id"], defaults=time_based_drop_defaults)
time_based_drop, created = TimeBasedDrop.objects.update_or_create(
id=drop_data["id"],
defaults=time_based_drop_defaults,
)
if created:
tqdm.write(f"Added {time_based_drop}")
except MultipleObjectsReturned as e:
@ -652,7 +819,10 @@ class Command(BaseCommand):
lock = self._cache_locks.get(model_name)
if lock is None:
# Fallback for models without a dedicated cache/lock
obj, created = model_class.objects.update_or_create(id=obj_id, defaults=defaults)
obj, created = model_class.objects.update_or_create(
id=obj_id,
defaults=defaults,
)
if created:
tqdm.write(f"Added {obj}")
return obj
@ -672,7 +842,10 @@ class Command(BaseCommand):
# Use get_or_create which is safer in a race. It might still fail if two threads
# try to create at the exact same time, so we wrap it.
try:
obj, created = model_class.objects.get_or_create(id=obj_id, defaults=defaults)
obj, created = model_class.objects.get_or_create(
id=obj_id,
defaults=defaults,
)
except IntegrityError:
# Another thread created it between our `get` and `create` attempt.
# The object is guaranteed to exist now, so we can just fetch it.
@ -700,8 +873,17 @@ class Command(BaseCommand):
return obj
def _get_or_create_benefit(self, benefit_id: str | int, defaults: dict[str, Any]) -> DropBenefit:
return self._get_or_create_cached("benefit", DropBenefit, benefit_id, defaults) # pyright: ignore[reportReturnType]
def _get_or_create_benefit(
self,
benefit_id: str | int,
defaults: dict[str, Any],
) -> DropBenefit:
return self._get_or_create_cached(
"benefit",
DropBenefit,
benefit_id,
defaults,
) # pyright: ignore[reportReturnType]
def game_update_or_create(self, campaign_data: dict[str, Any]) -> Game:
"""Update or create a game with caching.
@ -726,11 +908,13 @@ class Command(BaseCommand):
# Filter out None values to avoid overwriting with them
game_defaults = {k: v for k, v in game_defaults.items() if v is not None}
game: Game | Organization | DropCampaign | Channel | DropBenefit | str | int | None = self._get_or_create_cached(
model_name="game",
model_class=Game,
obj_id=game_data["id"],
defaults=game_defaults,
game: Game | Organization | DropCampaign | Channel | DropBenefit | str | int | None = (
self._get_or_create_cached(
model_name="game",
model_class=Game,
obj_id=game_data["id"],
defaults=game_defaults,
)
)
if not isinstance(game, Game):
msg = "Expected a Game instance from _get_or_create_cached"
@ -738,7 +922,10 @@ class Command(BaseCommand):
return game
def owner_update_or_create(self, campaign_data: dict[str, Any]) -> Organization | None:
def owner_update_or_create(
self,
campaign_data: dict[str, Any],
) -> Organization | None:
"""Update or create an organization with caching.
Args:
@ -768,7 +955,11 @@ class Command(BaseCommand):
return owner
return None
def drop_campaign_update_or_get(self, campaign_data: dict[str, Any], game: Game) -> DropCampaign:
def drop_campaign_update_or_get(
self,
campaign_data: dict[str, Any],
game: Game,
) -> DropCampaign:
"""Update or create a drop campaign with caching and channel handling.
Args:
@ -791,9 +982,18 @@ class Command(BaseCommand):
"details_url": campaign_data.get("detailsURL"),
"account_link_url": campaign_data.get("accountLinkURL"),
"image_url": campaign_data.get("imageURL"),
"start_at": parse_date(campaign_data.get("startAt") or campaign_data.get("startsAt")),
"end_at": parse_date(campaign_data.get("endAt") or campaign_data.get("endsAt")),
"is_account_connected": campaign_data.get("self", {}).get("isAccountConnected"),
"start_at": parse_date(
campaign_data.get("startAt") or campaign_data.get("startsAt"),
),
"end_at": parse_date(
campaign_data.get("endAt") or campaign_data.get("endsAt"),
),
"is_account_connected": (
campaign_data.get(
"self",
{},
).get("isAccountConnected")
),
"allow_is_enabled": allow_is_enabled,
}
@ -846,7 +1046,9 @@ class Command(BaseCommand):
channel_objects.append(channel)
# Set the many-to-many relationship (save only if different)
current_ids = set(drop_campaign.allow_channels.values_list("id", flat=True))
current_ids = set(
drop_campaign.allow_channels.values_list("id", flat=True),
)
new_ids = {ch.twitch_id for ch in channel_objects}
if current_ids != new_ids:
drop_campaign.allow_channels.set(channel_objects)