Remove caching logic from better_import_drops command and related tests
Some checks are pending
Run Pytest / test (push) Waiting to run

This commit is contained in:
Joakim Hellsén 2026-02-21 17:04:31 +01:00
commit d90b75d39c
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
3 changed files with 11 additions and 185 deletions

View file

@ -19,7 +19,6 @@ from django.core.files.base import ContentFile
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.management.base import CommandParser
from django.db import DatabaseError
from json_repair import JSONReturnType
from pydantic import ValidationError
from tqdm import tqdm
@ -427,14 +426,6 @@ class Command(BaseCommand):
help = "Import Twitch drop campaign data from a JSON file or directory"
requires_migrations_checks = True
# In-memory caches prevent repeated DB lookups during batch imports,
# cutting query volume and keeping runtime predictable.
game_cache: dict[str, Game] = {}
organization_cache: dict[str, Organization] = {}
drop_campaign_cache: dict[str, DropCampaign] = {}
channel_cache: dict[str, Channel] = {}
benefit_cache: dict[str, DropBenefit] = {}
def add_arguments(self, parser: CommandParser) -> None:
"""Populate the command with arguments."""
parser.add_argument(
@ -468,52 +459,6 @@ class Command(BaseCommand):
),
)
def pre_fill_cache(self) -> None:
"""Load all existing IDs from DB into memory."""
self.game_cache = {}
self.organization_cache = {}
self.drop_campaign_cache = {}
self.channel_cache = {}
self.benefit_cache = {}
cache_operations: list[tuple[str, type, str]] = [
("Games", Game, "game_cache"),
("Organizations", Organization, "organization_cache"),
("Drop Campaigns", DropCampaign, "drop_campaign_cache"),
("Channels", Channel, "channel_cache"),
("Benefits", DropBenefit, "benefit_cache"),
]
try:
with tqdm(cache_operations, desc="Loading caches", unit="cache", colour="cyan") as progress_bar:
for name, model, cache_attr in progress_bar:
self.load_cache_for_model(progress_bar, name, model, cache_attr)
tqdm.write("")
except DatabaseError, OSError, RuntimeError, ValueError, TypeError:
# If cache loading fails completely, just use empty caches
tqdm.write(f"{Fore.YELLOW}{Style.RESET_ALL} Cache preload skipped (database error)\n")
def load_cache_for_model(self, progress_bar: tqdm, name: str, model: type, cache_attr: str) -> None:
"""Load cache for a specific model and attach to the command instance.
Args:
progress_bar: TQDM progress bar instance.
name: Human-readable name of the model being cached.
model: Django model class to query.
cache_attr: Attribute name on the command instance to store the cache.
"""
progress_bar.set_description(f"Loading {name}")
try:
cache: dict[str, Any] = {str(obj.twitch_id): obj for obj in model.objects.all()}
setattr(self, cache_attr, cache)
progress_bar.write(f" {Fore.GREEN}{Style.RESET_ALL} {name}: {len(cache):,}")
except (DatabaseError, OSError, RuntimeError, ValueError, TypeError) as e:
# Database error - skip this cache
msg: str = f" {Fore.YELLOW}{Style.RESET_ALL} {name}: Could not load ({type(e).__name__})"
progress_bar.write(msg)
setattr(self, cache_attr, {})
def _validate_responses(
self,
responses: list[dict[str, Any]],
@ -570,7 +515,7 @@ class Command(BaseCommand):
self,
org_data: OrganizationSchema,
) -> Organization:
"""Get or create an organization from cache or database.
"""Get or create an organization.
Args:
org_data: Organization data from Pydantic model.
@ -578,10 +523,6 @@ class Command(BaseCommand):
Returns:
Organization instance.
"""
# Prefer cache hits to avoid hitting the DB on every campaign item.
if org_data.twitch_id in self.organization_cache:
return self.organization_cache[org_data.twitch_id]
org_obj, created = Organization.objects.update_or_create(
twitch_id=org_data.twitch_id,
defaults={
@ -591,9 +532,6 @@ class Command(BaseCommand):
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new organization: {org_data.name}")
# Cache the organization for future lookups.
self.organization_cache[org_data.twitch_id] = org_obj
return org_obj
def _get_or_create_game(
@ -601,7 +539,7 @@ class Command(BaseCommand):
game_data: GameSchema,
campaign_org_obj: Organization | None,
) -> Game:
"""Get or create a game from cache or database, using correct owner organization.
"""Get or create a game using correct owner organization.
Args:
game_data: Game data from Pydantic model.
@ -621,35 +559,6 @@ class Command(BaseCommand):
if campaign_org_obj:
owner_orgs.add(campaign_org_obj)
if game_data.twitch_id in self.game_cache:
game_obj: Game = self.game_cache[game_data.twitch_id]
update_fields: list[str] = []
# Update owners (ManyToMany)
current_owners = set(game_obj.owners.all())
new_owners = owner_orgs - current_owners
if new_owners:
game_obj.owners.add(*new_owners)
# Persist normalized display name when provided
if game_data.display_name and game_obj.display_name != game_data.display_name:
game_obj.display_name = game_data.display_name
update_fields.append("display_name")
# Persist canonical name when provided (Inventory format)
if game_data.name and game_obj.name != game_data.name:
game_obj.name = game_data.name
update_fields.append("name")
# Persist slug when provided by API (Inventory and DropCampaignDetails)
if game_data.slug is not None and game_obj.slug != (game_data.slug or ""):
game_obj.slug = game_data.slug or ""
update_fields.append("slug")
# Persist box art URL when provided
if game_data.box_art_url is not None and game_obj.box_art != (game_data.box_art_url or ""):
game_obj.box_art = game_data.box_art_url or ""
update_fields.append("box_art")
if update_fields:
game_obj.save(update_fields=update_fields)
self._download_game_box_art(game_obj, game_data.box_art_url or game_obj.box_art)
return game_obj
game_obj, created = Game.objects.update_or_create(
twitch_id=game_data.twitch_id,
defaults={
@ -664,7 +573,6 @@ class Command(BaseCommand):
game_obj.owners.add(*owner_orgs)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new game: {game_data.display_name}")
self.game_cache[game_data.twitch_id] = game_obj
self._download_game_box_art(game_obj, game_obj.box_art)
return game_obj
@ -694,7 +602,7 @@ class Command(BaseCommand):
game_obj.box_art_file.save(file_name, ContentFile(response.content), save=True)
def _get_or_create_channel(self, channel_info: ChannelInfoSchema) -> Channel:
"""Get or create a channel from cache or database.
"""Get or create a channel.
Args:
channel_info: Channel info from Pydantic model.
@ -702,10 +610,6 @@ class Command(BaseCommand):
Returns:
Channel instance.
"""
# Prefer cache hits to avoid hitting the DB on every campaign item.
if channel_info.twitch_id in self.channel_cache:
return self.channel_cache[channel_info.twitch_id]
# Use name as display_name fallback if displayName is None
display_name: str = channel_info.display_name or channel_info.name
@ -719,50 +623,9 @@ class Command(BaseCommand):
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new channel: {display_name}")
# Cache the channel for future lookups.
self.channel_cache[channel_info.twitch_id] = channel_obj
return channel_obj
def _should_skip_campaign_update(
self,
cached_obj: DropCampaign,
defaults: dict[str, Any],
game_obj: Game,
) -> bool:
"""Check if campaign update can be skipped based on cache comparison.
Args:
cached_obj: Cached campaign object.
defaults: New campaign data.
game_obj: Associated game object.
Returns:
True if no update needed, False otherwise.
"""
# Use game_id (Django's auto-generated FK field) to avoid
# triggering a query. Compare FK IDs to avoid ORM reads; keeps
# this a pure in-memory check.
cached_game_id: int | None = getattr(cached_obj, "game_id", None)
# Ensure game object has a primary key (should always be true
# at this point)
game_id: int | None = game_obj.pk
# Short-circuit updates when nothing changed; reduces write
# load and log noise while keeping caches accurate.
return bool(
cached_obj.name == defaults["name"]
and cached_obj.description == defaults["description"]
and getattr(cached_obj, "image_url", "") == defaults.get("image_url", "")
and cached_obj.start_at == defaults["start_at"]
and cached_obj.end_at == defaults["end_at"]
and cached_obj.details_url == defaults["details_url"]
and cached_obj.account_link_url == defaults["account_link_url"]
and cached_game_id == game_id,
)
def process_responses( # noqa: PLR0914
def process_responses(
self,
responses: list[dict[str, Any]],
file_path: Path,
@ -770,8 +633,6 @@ class Command(BaseCommand):
) -> tuple[bool, Path | None]:
"""Process, validate, and import campaign data from GraphQL responses.
With dependency resolution and caching.
Args:
responses: List of raw GraphQL response dictionaries to process.
file_path: Path to the file being processed.
@ -846,13 +707,6 @@ class Command(BaseCommand):
"account_link_url": drop_campaign.account_link_url,
}
if drop_campaign.twitch_id in self.drop_campaign_cache:
cached_obj: DropCampaign = self.drop_campaign_cache[drop_campaign.twitch_id]
if self._should_skip_campaign_update(cached_obj=cached_obj, defaults=defaults, game_obj=game_obj):
if options.get("verbose"):
tqdm.write(f"{Fore.YELLOW}{Style.RESET_ALL} Skipped (No changes): {drop_campaign.name}")
continue
campaign_obj, created = DropCampaign.objects.update_or_create(
twitch_id=drop_campaign.twitch_id,
defaults=defaults,
@ -860,8 +714,6 @@ class Command(BaseCommand):
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created new campaign: {drop_campaign.name}")
self.drop_campaign_cache[drop_campaign.twitch_id] = campaign_obj
action: Literal["Imported new", "Updated"] = "Imported new" if created else "Updated"
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} {action} campaign: {drop_campaign.name}")
@ -936,7 +788,7 @@ class Command(BaseCommand):
)
def _get_or_update_benefit(self, benefit_schema: DropBenefitSchema) -> DropBenefit:
"""Return a DropBenefit, updating stale cached values when needed."""
"""Return a DropBenefit, creating or updating as needed."""
distribution_type: str = (benefit_schema.distribution_type or "").strip()
benefit_defaults: dict[str, str | int | datetime | bool | None] = {
"name": benefit_schema.name,
@ -951,28 +803,13 @@ class Command(BaseCommand):
if created_at_dt:
benefit_defaults["created_at"] = created_at_dt
cached_benefit: DropBenefit | None = self.benefit_cache.get(benefit_schema.twitch_id)
benefit_obj, created = DropBenefit.objects.update_or_create(
twitch_id=benefit_schema.twitch_id,
defaults=benefit_defaults,
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created DropBenefit: {benefit_schema.name}")
if cached_benefit:
update_fields: list[str] = []
for field_name, value in benefit_defaults.items():
if getattr(cached_benefit, field_name) != value:
setattr(cached_benefit, field_name, value)
update_fields.append(field_name)
if update_fields:
cached_benefit.save(update_fields=update_fields)
benefit_obj: DropBenefit = cached_benefit
else:
benefit_obj, created = DropBenefit.objects.update_or_create(
twitch_id=benefit_schema.twitch_id,
defaults=benefit_defaults,
)
if created:
tqdm.write(f"{Fore.GREEN}{Style.RESET_ALL} Created DropBenefit: {benefit_schema.name}")
self.benefit_cache[benefit_schema.twitch_id] = benefit_obj
return benefit_obj
def _process_benefit_edges(
@ -1110,8 +947,6 @@ class Command(BaseCommand):
input_path: Path = Path(options["path"]).resolve()
self.pre_fill_cache()
try:
if input_path.is_file():
self.process_file(file_path=input_path, options=options)

View file

@ -49,7 +49,6 @@ class ExtractCampaignsTests(TestCase):
def test_validates_top_level_response_with_nested_campaign(self) -> None:
"""Ensure validation handles full responses correctly."""
command = Command()
command.pre_fill_cache()
payload: dict[str, object] = {
"data": {
@ -102,7 +101,6 @@ class ExtractCampaignsTests(TestCase):
def test_imports_inventory_response_and_sets_operation_name(self) -> None:
"""Ensure Inventory JSON imports work and operation_name is set correctly."""
command = Command()
command.pre_fill_cache()
# Inventory response with dropCampaignsInProgress
payload: dict[str, object] = {
@ -172,7 +170,6 @@ class ExtractCampaignsTests(TestCase):
def test_handles_inventory_with_null_campaigns(self) -> None:
"""Ensure Inventory JSON with null dropCampaignsInProgress is handled correctly."""
command = Command()
command.pre_fill_cache()
# Inventory response with null dropCampaignsInProgress
payload: dict[str, object] = {
@ -207,7 +204,6 @@ class ExtractCampaignsTests(TestCase):
def test_handles_inventory_with_allow_acl_url_and_missing_is_enabled(self) -> None:
"""Ensure ACL with url field and missing isEnabled is handled correctly."""
command = Command()
command.pre_fill_cache()
# Inventory response with allow ACL containing url field and no isEnabled
payload: dict[str, object] = {
@ -375,7 +371,6 @@ class OperationNameFilteringTests(TestCase):
def test_can_filter_campaigns_by_operation_name(self) -> None:
"""Ensure campaigns can be filtered by operation_name to separate data sources."""
command = Command()
command.pre_fill_cache()
# Import a ViewerDropsDashboard campaign
viewer_drops_payload = {
@ -491,7 +486,6 @@ class GameImportTests(TestCase):
def test_imports_game_slug_from_campaign(self) -> None:
"""Ensure Game.slug is imported from DropCampaign game data when provided."""
command = Command()
command.pre_fill_cache()
payload: dict[str, object] = {
"data": {
@ -549,7 +543,6 @@ class ExampleJsonImportTests(TestCase):
def test_imports_drop_campaign_details_and_persists_urls(self) -> None:
"""Ensure `imageURL` and other URL-ish fields are persisted from DropCampaignDetails."""
command = Command()
command.pre_fill_cache()
repo_root: Path = Path(__file__).resolve().parents[2]
example_path: Path = repo_root / "example.json"
@ -637,7 +630,6 @@ class ImporterRobustnessTests(TestCase):
def test_allows_null_image_url_and_persists_empty_string(self) -> None:
"""Ensure null imageURL doesn't fail validation and results in empty string in DB."""
command = Command()
command.pre_fill_cache()
payload: dict[str, object] = {
"data": {

View file

@ -15,7 +15,6 @@ class GameOwnerOrganizationTests(TestCase):
def test_game_owner_organization_precedence(self) -> None:
"""If both owner and ownerOrganization are present, game owner should be ownerOrganization."""
command = Command()
command.pre_fill_cache()
payload = {
"data": {