This commit is contained in:
Joakim Hellsén 2025-12-01 22:06:53 +01:00
commit a1aafa3ed2
No known key found for this signature in database
2 changed files with 101 additions and 46 deletions

View file

@ -15,6 +15,8 @@ dependencies = [
"pygments>=2.19.2", "pygments>=2.19.2",
"httpx>=0.28.1", "httpx>=0.28.1",
"pydantic>=2.12.5", "pydantic>=2.12.5",
"tqdm>=4.67.1",
"colorama>=0.4.6",
] ]
[dependency-groups] [dependency-groups]

View file

@ -3,13 +3,18 @@ from __future__ import annotations
import os import os
import sys import sys
from concurrent.futures import ProcessPoolExecutor from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed from itertools import repeat
from pathlib import Path from pathlib import Path
from typing import Any
from colorama import Fore
from colorama import Style
from colorama import init as colorama_init
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.core.management.base import CommandError from django.core.management.base import CommandError
from django.core.management.base import CommandParser from django.core.management.base import CommandParser
from pydantic import ValidationError from pydantic import ValidationError
from tqdm import tqdm
from twitch.models import Channel from twitch.models import Channel
from twitch.models import DropBenefit from twitch.models import DropBenefit
@ -54,24 +59,26 @@ class Command(BaseCommand):
parser.add_argument("path", type=str, help="Path to JSON file or directory") parser.add_argument("path", type=str, help="Path to JSON file or directory")
parser.add_argument("--recursive", action="store_true", help="Recursively search directories for JSON files") parser.add_argument("--recursive", action="store_true", help="Recursively search directories for JSON files")
parser.add_argument("--crash-on-error", action="store_true", help="Crash the command on first error instead of continuing") parser.add_argument("--crash-on-error", action="store_true", help="Crash the command on first error instead of continuing")
parser.add_argument("--verbose", action="store_true", help="Print per-file success messages (very chatty)")
def pre_fill_cache(self) -> None: def pre_fill_cache(self) -> None:
"""Load all existing IDs from DB into memory to avoid N+1 queries.""" """Load all existing IDs from DB into memory to avoid N+1 queries."""
self.stdout.write("Pre-filling caches...") cache_operations: list[tuple[str, type, str]] = [
self.game_cache = {str(g.twitch_id): g for g in Game.objects.all()} ("Games", Game, "game_cache"),
self.stdout.write(f"\tGames: {len(self.game_cache)}") ("Organizations", Organization, "organization_cache"),
("Drop Campaigns", DropCampaign, "drop_campaign_cache"),
("Channels", Channel, "channel_cache"),
("Benefits", DropBenefit, "benefit_cache"),
]
self.organization_cache = {str(o.twitch_id): o for o in Organization.objects.all()} with tqdm(cache_operations, desc="Loading caches", unit="cache", colour="cyan") as progress_bar:
self.stdout.write(f"\tOrganizations: {len(self.organization_cache)}") for name, model, cache_attr in progress_bar:
progress_bar.set_description(f"Loading {name}")
cache: dict[str, Any] = {str(obj.twitch_id): obj for obj in model.objects.all()}
setattr(self, cache_attr, cache)
progress_bar.write(f" {Fore.GREEN}{Style.RESET_ALL} {name}: {len(cache):,}")
self.drop_campaign_cache = {str(c.twitch_id): c for c in DropCampaign.objects.all()} tqdm.write("")
self.stdout.write(f"\tDrop Campaigns: {len(self.drop_campaign_cache)}")
self.channel_cache = {str(ch.twitch_id): ch for ch in Channel.objects.all()}
self.stdout.write(f"\tChannels: {len(self.channel_cache)}")
self.benefit_cache = {str(b.twitch_id): b for b in DropBenefit.objects.all()}
self.stdout.write(f"\tBenefits: {len(self.benefit_cache)}")
def handle(self, *args, **options) -> None: # noqa: ARG002 def handle(self, *args, **options) -> None: # noqa: ARG002
"""Main entry point for the command. """Main entry point for the command.
@ -79,6 +86,8 @@ class Command(BaseCommand):
Raises: Raises:
CommandError: If the provided path does not exist. CommandError: If the provided path does not exist.
""" """
colorama_init(autoreset=True)
input_path: Path = Path(options["path"]).resolve() input_path: Path = Path(options["path"]).resolve()
self.pre_fill_cache() self.pre_fill_cache()
@ -92,8 +101,8 @@ class Command(BaseCommand):
msg: str = f"Path does not exist: {input_path}" msg: str = f"Path does not exist: {input_path}"
raise CommandError(msg) raise CommandError(msg)
except KeyboardInterrupt: except KeyboardInterrupt:
self.stdout.write(self.style.WARNING("\n\nInterrupted by user!")) tqdm.write(self.style.WARNING("\n\nInterrupted by user!"))
self.stdout.write(self.style.WARNING("Shutting down gracefully...")) tqdm.write(self.style.WARNING("Shutting down gracefully..."))
sys.exit(130) sys.exit(130)
def process_json_files(self, input_path: Path, options: dict) -> None: def process_json_files(self, input_path: Path, options: dict) -> None:
@ -104,28 +113,66 @@ class Command(BaseCommand):
options: Command options options: Command options
""" """
json_files: list[Path] = self.collect_json_files(options, input_path) json_files: list[Path] = self.collect_json_files(options, input_path)
self.stdout.write(f"Found {len(json_files)} JSON files to process") tqdm.write(f"Found {len(json_files)} JSON files to process\n")
completed_count = 0 success_count = 0
with ProcessPoolExecutor() as executor: failed_count = 0
futures = {executor.submit(self.process_file_worker, file_path, options): file_path for file_path in json_files} error_count = 0
for future in as_completed(futures): with (
file_path: Path = futures[future] ProcessPoolExecutor() as executor,
tqdm(
total=len(json_files),
desc="Processing",
unit="file",
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
colour="green",
dynamic_ncols=True,
) as progress_bar,
):
# Choose a reasonable chunk_size to reduce overhead for huge file counts
cpu_count = os.cpu_count() or 1
chunk_size = max(1, min(1000, len(json_files) // (cpu_count * 8 or 1)))
results_iter = executor.map(self.process_file_worker, json_files, repeat(options), chunksize=chunk_size)
for file_path in json_files:
try: try:
result: dict[str, bool | str] = future.result() result: dict[str, bool | str] = next(results_iter)
if result["success"]: if result["success"]:
self.stdout.write(f"{file_path}") success_count += 1
if options.get("verbose"):
progress_bar.write(f"{Fore.GREEN}{Style.RESET_ALL} {file_path.name}")
else: else:
self.stdout.write(f"{file_path} -> {result['broken_dir']}/{file_path.name}") failed_count += 1
progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{result['broken_dir']}/{file_path.name}")
completed_count += 1
except (OSError, ValueError, KeyError) as e: except (OSError, ValueError, KeyError) as e:
self.stdout.write(f"{file_path} (error: {e})") error_count += 1
completed_count += 1 progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name} (error: {e})")
self.stdout.write(f"Progress: {completed_count}/{len(json_files)} files processed") # Update postfix with statistics
self.stdout.write("") progress_bar.set_postfix_str(f"{success_count} | ✗ {failed_count + error_count}", refresh=True)
progress_bar.update(1)
self.print_processing_summary(json_files, success_count, failed_count, error_count)
def print_processing_summary(self, json_files: list[Path], success_count: int, failed_count: int, error_count: int) -> None:
"""Print a summary of the batch processing results.
Args:
json_files: List of JSON file paths that were processed.
success_count: Number of files processed successfully.
failed_count: Number of files that failed validation and were moved.
error_count: Number of files that encountered unexpected errors.
"""
tqdm.write("\n" + "=" * 50)
tqdm.write(self.style.SUCCESS(f"✓ Successfully processed: {success_count}"))
if failed_count > 0:
tqdm.write(self.style.WARNING(f"✗ Validation failed: {failed_count}"))
if error_count > 0:
tqdm.write(self.style.ERROR(f"✗ Errors: {error_count}"))
tqdm.write(f"Total: {len(json_files)}")
tqdm.write("=" * 50)
def collect_json_files(self, options: dict, input_path: Path) -> list[Path]: def collect_json_files(self, options: dict, input_path: Path) -> list[Path]:
"""Collect JSON files from the specified directory. """Collect JSON files from the specified directory.
@ -181,14 +228,20 @@ class Command(BaseCommand):
Raises: Raises:
ValidationError: If the JSON file fails validation ValidationError: If the JSON file fails validation
""" """
self.stdout.write(f"Processing file: {file_path}") with tqdm(
total=1,
desc=f"Processing {file_path.name}",
unit="file",
colour="green",
dynamic_ncols=True,
) as progress_bar:
try: try:
_: ViewerDropsDashboardPayload = ViewerDropsDashboardPayload.model_validate_json(file_path.read_text(encoding="utf-8")) _: ViewerDropsDashboardPayload = ViewerDropsDashboardPayload.model_validate_json(file_path.read_text(encoding="utf-8"))
self.stdout.write("\tProcessed drop campaigns") progress_bar.update(1)
progress_bar.write(f"{Fore.GREEN}{Style.RESET_ALL} {file_path.name}")
except ValidationError: except ValidationError:
if options["crash_on_error"]: if options["crash_on_error"]:
raise raise
broken_dir: Path = move_failed_validation_file(file_path) broken_dir: Path = move_failed_validation_file(file_path)
self.stdout.write(f"\tMoved to {broken_dir} (validation failed)") progress_bar.write(f"{Fore.RED}{Style.RESET_ALL} {file_path.name}{broken_dir}/{file_path.name}")