Make Owner optional; use dateparser to parse dates; use json-repair to read JSON

This commit is contained in:
Joakim Hellsén 2025-09-01 21:50:38 +02:00
commit 6d5f014134
12 changed files with 858 additions and 453 deletions

View file

@ -1,18 +1,16 @@
from __future__ import annotations
import json
import logging
import re
import shutil
import traceback
from pathlib import Path
from typing import TYPE_CHECKING, Any
import orjson
import dateparser
import json_repair
from django.core.management.base import BaseCommand, CommandError, CommandParser
from django.db import transaction
from django.utils import timezone
from django.utils.dateparse import parse_datetime
from twitch.models import DropBenefit, DropBenefitEdge, DropCampaign, Game, Organization, TimeBasedDrop
@ -23,6 +21,30 @@ if TYPE_CHECKING:
logger: logging.Logger = logging.getLogger(__name__)
def parse_date(value: str | None) -> datetime | None:
"""Parse a datetime string into a timezone-aware datetime using dateparser.
Args:
value: The datetime string to parse.
Returns:
A timezone-aware datetime object or None if parsing fails.
"""
value = (value or "").strip()
if not value or value == "None":
return None
dt: datetime | None = dateparser.parse(value, settings={"RETURN_AS_TIMEZONE_AWARE": True})
if not dt:
return None
# Ensure aware in Django's current timezone
if timezone.is_naive(dt):
dt = timezone.make_aware(dt, timezone.get_current_timezone())
return dt
class Command(BaseCommand):
"""Import Twitch drop campaign data from a JSON file or directory of JSON files."""
@ -96,19 +118,6 @@ class Command(BaseCommand):
self._process_file(json_file, processed_path)
except CommandError as e:
self.stdout.write(self.style.ERROR(f"Error processing {json_file}: {e}"))
except (orjson.JSONDecodeError, json.JSONDecodeError):
# Attempt to clean trailing broken JSON and retry parsing
try:
self.clean_file(json_file)
self.stdout.write(self.style.SUCCESS(f"Cleaned JSON in '{json_file.name}', retrying import."))
# re-process the cleaned file
self._process_file(json_file, processed_path)
except (orjson.JSONDecodeError, json.JSONDecodeError):
# Still invalid after cleanup, move to broken_json
broken_json_dir: Path = processed_path / "broken_json"
broken_json_dir.mkdir(parents=True, exist_ok=True)
self.stdout.write(self.style.WARNING(f"Invalid JSON in '{json_file}', even after cleanup. Moving to '{broken_json_dir}'."))
self.move_file(json_file, broken_json_dir / json_file.name)
except (ValueError, TypeError, AttributeError, KeyError, IndexError):
self.stdout.write(self.style.ERROR(f"Data error processing {json_file}"))
self.stdout.write(self.style.ERROR(traceback.format_exc()))
@ -119,6 +128,9 @@ class Command(BaseCommand):
def _process_file(self, file_path: Path, processed_path: Path) -> None:
"""Process a single JSON file.
Raises:
CommandError: If the file isn't a JSON file or has an invalid JSON structure.
Args:
file_path: Path to the JSON file.
processed_path: Subdirectory to move processed files to.
@ -126,7 +138,7 @@ class Command(BaseCommand):
raw_bytes: bytes = file_path.read_bytes()
raw_text: str = raw_bytes.decode("utf-8")
data = orjson.loads(raw_bytes)
data = json_repair.loads(raw_text)
broken_dir: Path = processed_path / "broken"
broken_dir.mkdir(parents=True, exist_ok=True)
@ -222,8 +234,11 @@ class Command(BaseCommand):
if isinstance(data, list):
for _item in data:
self.import_drop_campaign(_item, file_path=file_path)
else:
elif isinstance(data, dict):
self.import_drop_campaign(data, file_path=file_path)
else:
msg: str = f"Invalid JSON structure in {file_path}: Expected dict or list at top level"
raise CommandError(msg)
self.move_file(file_path, processed_path)
@ -341,71 +356,52 @@ class Command(BaseCommand):
"""
with transaction.atomic():
game: Game = self.game_update_or_create(campaign_data=campaign_data)
organization: Organization | None = self.owner_update_or_create(campaign_data=campaign_data)
organization: Organization | None = self.owner_update_or_create(campaign_data=campaign_data, file_path=file_path)
if organization is None:
self.stdout.write(self.style.WARNING("No organization found for this campaign, skipping drop campaign import."))
return
if organization:
game.owner = organization
game.save(update_fields=["owner"])
drop_campaign: DropCampaign = self.drop_campaign_update_or_get(
campaign_data=campaign_data,
game=game,
organization=organization,
)
drop_campaign: DropCampaign = self.drop_campaign_update_or_get(campaign_data=campaign_data, game=game)
for drop_data in campaign_data.get("timeBasedDrops", []):
time_based_drop: TimeBasedDrop = self.create_time_based_drop(drop_campaign=drop_campaign, drop_data=drop_data)
self._process_time_based_drop(drop_data, drop_campaign, file_path)
benefit_edges: list[dict[str, Any]] = drop_data.get("benefitEdges", [])
if not benefit_edges:
self.stdout.write(self.style.WARNING(f"No benefit edges found for drop {time_based_drop.name} (ID: {time_based_drop.id})"))
self.move_file(file_path, Path("no_benefit_edges") / file_path.name)
continue
for benefit_edge in benefit_edges:
benefit_defaults: dict[str, Any] = {}
benefit_data: dict[str, Any] = benefit_edge["benefit"]
benefit_name: str = str(benefit_data.get("name")).strip()
if benefit_name and benefit_name != "None":
benefit_defaults["name"] = benefit_name
img_asset: str = str(benefit_data.get("imageAssetURL")).strip()
if img_asset and img_asset != "None":
benefit_defaults["image_asset_url"] = img_asset
created_at: str = str(benefit_data.get("createdAt")).strip()
if created_at and created_at != "None":
benefit_defaults["created_at"] = created_at
ent_limit: int | None = benefit_data.get("entitlementLimit")
if ent_limit is not None:
benefit_defaults["entitlement_limit"] = ent_limit
ios_avail: bool | None = benefit_data.get("isIosAvailable")
if ios_avail is not None:
benefit_defaults["is_ios_available"] = ios_avail
dist_type: str | None = benefit_data.get("distributionType")
if dist_type is not None:
benefit_defaults["distribution_type"] = dist_type
benefit_defaults["game"] = game
benefit_defaults["owner_organization"] = organization
benefit, _ = DropBenefit.objects.update_or_create(
id=benefit_data["id"],
defaults=benefit_defaults,
)
DropBenefitEdge.objects.update_or_create(
drop=time_based_drop,
benefit=benefit,
defaults={
"entitlement_limit": benefit_edge.get("entitlementLimit", 1),
},
)
self.stdout.write(self.style.SUCCESS(f"Successfully imported drop campaign {drop_campaign.name} (ID: {drop_campaign.id})"))
def _process_time_based_drop(self, drop_data: dict[str, Any], drop_campaign: DropCampaign, file_path: Path) -> None:
time_based_drop: TimeBasedDrop = self.create_time_based_drop(drop_campaign=drop_campaign, drop_data=drop_data)
benefit_edges: list[dict[str, Any]] = drop_data.get("benefitEdges", [])
if not benefit_edges:
self.stdout.write(self.style.WARNING(f"No benefit edges found for drop {time_based_drop.name} (ID: {time_based_drop.id})"))
self.move_file(file_path, Path("no_benefit_edges") / file_path.name)
return
for benefit_edge in benefit_edges:
benefit_data: dict[str, Any] = benefit_edge["benefit"]
benefit_defaults = {
"name": benefit_data.get("name"),
"image_asset_url": benefit_data.get("imageAssetURL"),
"created_at": parse_date(benefit_data.get("createdAt")),
"entitlement_limit": benefit_data.get("entitlementLimit"),
"is_ios_available": benefit_data.get("isIosAvailable"),
"distribution_type": benefit_data.get("distributionType"),
}
# Filter out None values to avoid overwriting with them
benefit_defaults = {k: v for k, v in benefit_defaults.items() if v is not None}
benefit, _ = DropBenefit.objects.update_or_create(
id=benefit_data["id"],
defaults=benefit_defaults,
)
DropBenefitEdge.objects.update_or_create(
drop=time_based_drop,
benefit=benefit,
defaults={"entitlement_limit": benefit_edge.get("entitlementLimit", 1)},
)
def create_time_based_drop(self, drop_campaign: DropCampaign, drop_data: dict[str, Any]) -> TimeBasedDrop:
"""Creates or updates a TimeBasedDrop instance based on the provided drop data.
@ -423,49 +419,18 @@ class Command(BaseCommand):
TimeBasedDrop: The created or updated TimeBasedDrop instance.
"""
defaults: dict[str, Any] = {}
time_based_drop_defaults: dict[str, Any] = {
"campaign": drop_campaign,
"name": drop_data.get("name"),
"required_minutes_watched": drop_data.get("requiredMinutesWatched"),
"required_subs": drop_data.get("requiredSubs"),
"start_at": parse_date(drop_data.get("startAt")),
"end_at": parse_date(drop_data.get("endAt")),
}
# Filter out None values to avoid overwriting with them
time_based_drop_defaults = {k: v for k, v in time_based_drop_defaults.items() if v is not None}
name: str = drop_data.get("name", "")
if name:
defaults["name"] = name.strip()
# "requiredMinutesWatched": 240
required_minutes_watched: int = drop_data.get("requiredMinutesWatched", 0)
if required_minutes_watched:
defaults["required_minutes_watched"] = int(required_minutes_watched)
# "requiredSubs": 1,
required_subs: int = drop_data.get("requiredSubs", 0)
if required_subs:
defaults["required_subs"] = int(required_subs)
# "startAt": "2025-08-08T07:00:00Z",
# Model field is DateTimeField
start_at: str | None = drop_data.get("startAt")
if start_at:
# Convert to timezone-aware datetime
parsed_start_at: datetime | None = parse_datetime(start_at)
if parsed_start_at and timezone.is_naive(parsed_start_at):
parsed_start_at = timezone.make_aware(parsed_start_at)
if parsed_start_at:
defaults["start_at"] = parsed_start_at
# "endAt": "2025-02-04T10:59:59.999Z",
# Model field is DateTimeField
end_at: str | None = drop_data.get("endAt")
if end_at:
# Convert to timezone-aware datetime
parsed_end_at: datetime | None = parse_datetime(end_at)
if parsed_end_at and timezone.is_naive(parsed_end_at):
parsed_end_at = timezone.make_aware(parsed_end_at)
if parsed_end_at:
defaults["end_at"] = parsed_end_at
defaults["campaign"] = drop_campaign
time_based_drop, created = TimeBasedDrop.objects.update_or_create(id=drop_data["id"], defaults=defaults)
time_based_drop, created = TimeBasedDrop.objects.update_or_create(id=drop_data["id"], defaults=time_based_drop_defaults)
if created:
self.stdout.write(self.style.SUCCESS(f"Successfully imported time-based drop {time_based_drop.name} (ID: {time_based_drop.id})"))
@ -475,7 +440,6 @@ class Command(BaseCommand):
self,
campaign_data: dict[str, Any],
game: Game,
organization: Organization | None,
) -> DropCampaign:
"""Update or create a drop campaign.
@ -487,51 +451,33 @@ class Command(BaseCommand):
Returns:
Returns the DropCampaign object.
"""
defaults: dict[str, Any] = {}
name = campaign_data.get("name")
if name is not None:
defaults["name"] = name
desc = campaign_data.get("description")
if desc is not None:
defaults["description"] = desc.replace("\\n", "\n")
details = campaign_data.get("detailsURL")
if details is not None:
defaults["details_url"] = details
acct_link = campaign_data.get("accountLinkURL")
if acct_link is not None:
defaults["account_link_url"] = acct_link
img = campaign_data.get("imageURL")
if img is not None:
defaults["image_url"] = img
start = campaign_data.get("startAt")
if start is not None:
defaults["start_at"] = start
end = campaign_data.get("endAt")
if end is not None:
defaults["end_at"] = end
is_conn = campaign_data.get("self", {}).get("isAccountConnected")
if is_conn is not None:
defaults["is_account_connected"] = is_conn
defaults["game"] = game
if organization:
defaults["owner"] = organization
drop_campaign_defaults: dict[str, Any] = {
"game": game,
"name": campaign_data.get("name"),
"description": campaign_data.get("description"),
"details_url": campaign_data.get("detailsURL"),
"account_link_url": campaign_data.get("accountLinkURL"),
"image_url": campaign_data.get("imageURL"),
"start_at": parse_date(campaign_data.get("startAt") or campaign_data.get("startsAt")),
"end_at": parse_date(campaign_data.get("endAt") or campaign_data.get("endsAt")),
"is_account_connected": campaign_data.get("self", {}).get("isAccountConnected"),
}
# Filter out None values to avoid overwriting with them
drop_campaign_defaults = {k: v for k, v in drop_campaign_defaults.items() if v is not None}
drop_campaign, created = DropCampaign.objects.update_or_create(
id=campaign_data["id"],
defaults=defaults,
defaults=drop_campaign_defaults,
)
if created:
self.stdout.write(self.style.SUCCESS(f"Created new drop campaign: {drop_campaign.name} (ID: {drop_campaign.id})"))
return drop_campaign
def owner_update_or_create(self, campaign_data: dict[str, Any], file_path: Path) -> Organization | None:
def owner_update_or_create(self, campaign_data: dict[str, Any]) -> Organization | None:
"""Update or create an organization.
Args:
campaign_data: The drop campaign data to import.
file_path: Optional path to the file being processed, used for error handling.
Returns:
Returns the Organization object.
@ -540,37 +486,20 @@ class Command(BaseCommand):
if not org_data:
self.stdout.write(self.style.WARNING("No owner data found in campaign data. Attempting to find organization by game."))
# Try to find an organization by the game if possible
game_id: str | None = campaign_data.get("game", {}).get("id")
if game_id:
game: Game | None = Game.objects.filter(id=game_id).first()
if game:
if game.organizations.exists():
org: Organization | None = game.organizations.first()
if org:
self.stdout.write(self.style.SUCCESS(f"Found organization '{org.name}' for game '{game.display_name}'"))
return org
else:
self.stdout.write(self.style.WARNING(f"No game found with id '{game_id}' when looking up organization."))
organization: Organization | None = None
if org_data:
org_defaults: dict[str, Any] = {"name": org_data.get("name")}
# Filter out None values to avoid overwriting with them
org_defaults = {k: v for k, v in org_defaults.items() if v is not None}
# If not found, move the file for manual review
self.stdout.write(self.style.WARNING("No organization found for this campaign, moving file for review."))
todo_dir: Path = Path("check_these_please")
todo_dir.mkdir(parents=True, exist_ok=True)
self.move_file(
file_path,
todo_dir / file_path.name,
organization, created = Organization.objects.update_or_create(
id=org_data["id"],
defaults=org_defaults,
)
return None
organization, created = Organization.objects.update_or_create(
id=org_data["id"],
defaults={"name": org_data["name"]},
)
if created:
self.stdout.write(self.style.SUCCESS(f"Created new organization: {organization.name} (ID: {organization.id})"))
return organization
if created:
self.stdout.write(self.style.SUCCESS(f"Created new organization: {organization.name} (ID: {organization.id})"))
return organization
return None
def game_update_or_create(self, campaign_data: dict[str, Any]) -> Game:
"""Update or create a game.
@ -628,21 +557,3 @@ class Command(BaseCommand):
if changed_fields:
obj.save(update_fields=changed_fields)
return obj, created
def clean_file(self, path: Path) -> None:
"""Strip trailing broken JSON after the last 'extensions' block."""
text: str = path.read_text(encoding="utf-8")
# Handle extensions block at end of a JSON array
cleaned: str = re.sub(
r'(?s),?\s*"extensions"\s*:\s*\{.*?\}\s*\}\s*\]\s*$',
"}]",
text,
)
if cleaned == text:
# Fallback for standalone extensions block
cleaned = re.sub(
r'(?s),?\s*"extensions"\s*:\s*\{.*?\}\s*$',
"}",
text,
)
path.write_text(cleaned, encoding="utf-8")