Refactor import command

This commit is contained in:
Joakim Hellsén 2025-07-31 23:53:57 +02:00
commit 1a015f102c
7 changed files with 135 additions and 284 deletions

View file

@ -1,165 +0,0 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from django.core.management.base import BaseCommand, CommandParser
class Command(BaseCommand):
"""Django management command to clean response files that only contain PlaybackAccessToken data.
This command scans JSON files in the specified directory and removes those that only contain
PlaybackAccessToken data without any other meaningful content.
"""
help = "Cleans response files that only contain PlaybackAccessToken data"
def add_arguments(self, parser: CommandParser) -> None:
"""Add command line arguments to the parser.
Parameters:
----------
parser : CommandParser
The command argument parser
"""
parser.add_argument("--dir", type=str, default="responses", help="Directory containing the response files to clean")
parser.add_argument(
"--deleted-dir",
type=str,
default=None,
help="Directory to move files to instead of deleting them (defaults to '<dir>/deleted')",
)
def is_playback_token_only(self, data: dict[str, Any]) -> bool:
"""Determine if a JSON data structure only contains PlaybackAccessToken data.
Parameters:
----------
data : dict[str, Any]
The JSON data to check
Returns:
-------
bool
True if the data only contains PlaybackAccessToken data, False otherwise
"""
# Check if data has streamPlaybackAccessToken and it's the only key
has_playback_token = (
"data" in data
and "streamPlaybackAccessToken" in data["data"]
and "__typename" in data["data"]["streamPlaybackAccessToken"]
and data["data"]["streamPlaybackAccessToken"]["__typename"] == "PlaybackAccessToken"
and len(data["data"]) == 1
)
if has_playback_token:
self.stdout.write(f"Found PlaybackAccessToken only in {data['data']['streamPlaybackAccessToken']['__typename']}")
return True
# Also check if the operation name in extensions is PlaybackAccessToken and no other data
return (
"extensions" in data
and "operationName" in data["extensions"]
and data["extensions"]["operationName"] == "PlaybackAccessToken"
and ("data" not in data or ("data" in data and len(data["data"]) <= 1))
)
def process_file(self, file_path: Path, *, deleted_dir: Path) -> bool:
"""Process a single JSON file to check if it only contains PlaybackAccessToken data.
Parameters:
----------
file_path : Path
The path to the JSON file
deleted_dir : Path, keyword-only
Directory to move files to instead of deleting them
Returns:
-------
bool
True if the file was (or would be) moved, False otherwise
"""
try:
data = json.loads(file_path.read_text(encoding="utf-8"))
if self.is_playback_token_only(data):
# Create the deleted directory if it doesn't exist
if not deleted_dir.exists():
deleted_dir.mkdir(parents=True, exist_ok=True)
# Get the relative path from the source directory to maintain structure
target_file = deleted_dir / file_path.name
# If a file with the same name already exists in the target dir,
# append a number to the filename
counter = 1
while target_file.exists():
stem = target_file.stem
# If the stem already ends with a counter pattern like "_1", increment it
if stem.rfind("_") > 0 and stem[stem.rfind("_") + 1 :].isdigit():
base_stem = stem[: stem.rfind("_")]
counter = int(stem[stem.rfind("_") + 1 :]) + 1
target_file = deleted_dir / f"{base_stem}_{counter}{target_file.suffix}"
else:
target_file = deleted_dir / f"{stem}_{counter}{target_file.suffix}"
counter += 1
# Move the file
file_path.rename(target_file)
self.stdout.write(f"Moved: {file_path} to {target_file}")
return True
except json.JSONDecodeError:
self.stderr.write(self.style.WARNING(f"Error parsing JSON in {file_path}"))
except OSError as e:
self.stderr.write(self.style.ERROR(f"IO error processing {file_path}: {e!s}"))
return False
def handle(self, **options: dict[str, object]) -> None:
"""Execute the command to clean response files.
Parameters:
----------
**options : dict[str, object]
Command options
"""
directory = str(options["dir"])
dry_run = bool(options.get("dry_run"))
deleted_dir_path = options.get("deleted_dir")
# Set up the base directory for processing
base_dir = Path(directory)
if not base_dir.exists():
self.stderr.write(self.style.ERROR(f"Directory {directory} does not exist"))
return
# Set up the deleted directory
deleted_dir: Path = Path(str(deleted_dir_path)) if deleted_dir_path else base_dir / "deleted"
if not dry_run and not deleted_dir.exists():
deleted_dir.mkdir(parents=True, exist_ok=True)
self.stdout.write(f"Created directory for moved files: {deleted_dir}")
file_count = 0
moved_count = 0
# Process all JSON files in the directory
for file_path in base_dir.glob("**/*.json"):
# Skip files in the deleted directory
if deleted_dir in file_path.parents or deleted_dir == file_path.parent:
continue
if not file_path.is_file():
continue
file_count += 1
if self.process_file(file_path, deleted_dir=deleted_dir):
moved_count += 1
# Report the results
self.stdout.write(
self.style.SUCCESS(f"Cleanup completed: Processed {file_count} files, moved {moved_count} files to {deleted_dir}")
)

View file

@ -2,9 +2,11 @@ from __future__ import annotations
import json
import shutil
import traceback
from pathlib import Path
from typing import Any
import orjson
from django.core.management.base import BaseCommand, CommandError, CommandParser
from django.db import transaction
@ -15,6 +17,7 @@ class Command(BaseCommand):
"""Import Twitch drop campaign data from a JSON file or directory of JSON files."""
help = "Import Twitch drop campaign data from a JSON file or directory"
requires_migrations_checks = True
def add_arguments(self, parser: CommandParser) -> None:
"""Add command arguments.
@ -74,12 +77,17 @@ class Command(BaseCommand):
self._process_file(json_file, processed_path)
except CommandError as e:
self.stdout.write(self.style.ERROR(f"Error processing {json_file}: {e}"))
except (ValueError, TypeError, AttributeError, KeyError, IndexError, json.JSONDecodeError) as e:
self.stdout.write(self.style.ERROR(f"Data error processing {json_file}: {e!s}"))
except (orjson.JSONDecodeError, json.JSONDecodeError):
broken_json_dir: Path = processed_path / "broken_json"
broken_json_dir.mkdir(exist_ok=True)
self.stdout.write(self.style.WARNING(f"Invalid JSON in '{json_file}'. Moving to '{broken_json_dir}'."))
shutil.move(str(json_file), str(broken_json_dir))
except (ValueError, TypeError, AttributeError, KeyError, IndexError):
self.stdout.write(self.style.ERROR(f"Data error processing {json_file}"))
self.stdout.write(self.style.ERROR(traceback.format_exc()))
self.stdout.write(
self.style.SUCCESS(f"Completed processing {total_files} JSON files in {directory}. Processed files moved to {processed_path}.")
)
msg: str = f"Processed {total_files} JSON files in {directory}. Moved processed files to {processed_path}."
self.stdout.write(self.style.SUCCESS(msg))
def _process_file(self, file_path: Path, processed_path: Path) -> None:
"""Process a single JSON file.
@ -91,8 +99,27 @@ class Command(BaseCommand):
Raises:
CommandError: If the file isn't a JSON file or has invalid JSON structure.
"""
with file_path.open(encoding="utf-8") as f:
data = json.load(f)
data = orjson.loads(file_path.read_text(encoding="utf-8"))
broken_dir: Path = processed_path / "broken"
# Remove shit
if not isinstance(data, list):
try:
token = data["data"]["streamPlaybackAccessToken"]
if token["__typename"] == "PlaybackAccessToken" and len(data["data"]) == 1:
shutil.move(str(file_path), str(broken_dir))
self.stdout.write(f"Moved {file_path} to {broken_dir}. This file only contains PlaybackAccessToken data.")
return
if data["extensions"]["operationName"] == "PlaybackAccessToken" and ("data" not in data or len(data["data"]) <= 1):
shutil.move(str(file_path), str(broken_dir))
self.stdout.write(f"Moved {file_path} to {broken_dir}. This file only contains PlaybackAccessToken data.")
return
except KeyError:
return
# Move DropsHighlightService_AvailableDrops to its own dir
# TODO(TheLovinator): Check if we should import this # noqa: TD003
if isinstance(data, list):
for item in data:

View file

@ -1,68 +0,0 @@
from __future__ import annotations
import json
import shutil
from pathlib import Path
from django.core.management.base import BaseCommand, CommandError, CommandParser
class Command(BaseCommand):
"""Validate JSON files and move invalid ones to an error directory."""
help = "Validate JSON files and move invalid ones to an error directory."
def add_arguments(self, parser: CommandParser) -> None:
"""Add command arguments.
Args:
parser: The command argument parser.
"""
parser.add_argument(
"path",
type=str,
help="Path to the directory containing JSON files to validate.",
)
parser.add_argument(
"--error-dir",
type=str,
default="error",
help="Name of subdirectory to move files with JSON errors to (default: 'error')",
)
def handle(self, **options: str) -> None:
"""Handle the command.
Args:
**options: Arbitrary keyword arguments.
Raises:
CommandError: If the provided path is not a valid directory.
"""
path = Path(options["path"])
error_dir_name = options["error_dir"]
if not path.is_dir():
msg = f"Path '{path}' is not a valid directory."
raise CommandError(msg)
error_dir = path / error_dir_name
error_dir.mkdir(exist_ok=True)
self.stdout.write(f"Validating JSON files in '{path}'...")
for file_path in path.glob("*.json"):
if file_path.is_file():
try:
with file_path.open("r", encoding="utf-8") as f:
json.load(f)
except json.JSONDecodeError:
self.stdout.write(self.style.WARNING(f"Invalid JSON in '{file_path.name}'. Moving to '{error_dir_name}'."))
try:
shutil.move(str(file_path), str(error_dir / file_path.name))
except Exception as e: # noqa: BLE001
self.stderr.write(self.style.ERROR(f"Could not move file '{file_path.name}': {e}"))
except Exception as e: # noqa: BLE001
self.stderr.write(self.style.ERROR(f"An unexpected error occurred with file '{file_path.name}': {e}"))
self.stdout.write(self.style.SUCCESS("Finished validating JSON files."))