Use one thread for each domain when grabbing entries

This commit is contained in:
Joakim Hellsén 2024-03-17 23:36:56 +01:00
commit c3ebd9faa2
No known key found for this signature in database
GPG key ID: D196AE66FEBE1DC9
4 changed files with 121 additions and 28 deletions

View file

@ -1,13 +1,41 @@
from __future__ import annotations
from collections import defaultdict
from datetime import timedelta
from threading import Thread
from django.core.management.base import BaseCommand, no_translations
from django.db.models import Q
from django.utils import timezone
from rich.console import Console
from rich.progress import Progress
from feedvault.feeds import grab_entries
from feedvault.models import Entry, Feed
from feedvault.models import Feed
console = Console()
class DomainUpdater(Thread):
def __init__(self, feeds: list[Feed], progress: Progress, *args, **kwargs) -> None: # noqa: ANN002, ANN003
"""Update feeds in a separate thread.
Args:
feeds: The feeds to update.
progress: The Rich progress bar.
*args: Arbitrary positional arguments.
**kwargs: Arbitrary keyword arguments.
"""
super().__init__(*args, **kwargs)
self.feeds: list[Feed] = feeds
self.progress: Progress = progress
def run(self) -> None:
with self.progress as progress:
task = progress.add_task("[cyan]Updating feeds...", total=len(self.feeds))
for feed in self.feeds:
grab_entries(feed)
progress.update(task, advance=1, description=f"[green]Updated {feed.feed_url}")
class Command(BaseCommand):
@ -16,22 +44,23 @@ class Command(BaseCommand):
@no_translations
def handle(self, *args, **options) -> None: # noqa: ANN002, ANN003, ARG002
new_entries: int = 0
# Grab feeds that haven't been checked in 15 minutes OR haven't been checked at all
for feed in Feed.objects.filter(
feeds = Feed.objects.filter(
Q(last_checked__lte=timezone.now() - timedelta(minutes=15)) | Q(last_checked__isnull=True),
):
entries: None | list[Entry] = grab_entries(feed)
if not entries:
self.stdout.write(f"No new entries for {feed.title}")
continue
)
domain_feeds = defaultdict(list)
self.stdout.write(f"Updated {feed}")
self.stdout.write(f"Added {len(entries)} new entries for {feed}")
new_entries += len(entries)
for feed in feeds:
domain_feeds[feed.domain.pk].append(feed)
if new_entries:
self.stdout.write(self.style.SUCCESS(f"Successfully updated feeds. Added {new_entries} new entries"))
threads = []
progress = Progress()
self.stdout.write("No new entries found")
for feeds in domain_feeds.values():
thread = DomainUpdater(feeds, progress)
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
console.log("[bold green]Successfully updated feeds")

View file

@ -46,11 +46,6 @@ WSGI_APPLICATION = "feedvault.wsgi.application"
NINJA_PAGINATION_PER_PAGE = 1000
STATIC_URL = "static/"
STATIC_ROOT: Path = BASE_DIR / "staticfiles"
STATICFILES_STORAGE = (
"django.contrib.staticfiles.storage.StaticFilesStorage"
if TESTING
else "whitenoise.storage.CompressedManifestStaticFilesStorage"
)
STATIC_ROOT.mkdir(parents=True, exist_ok=True)
MEDIA_URL = "media/"
MEDIA_ROOT: Path = BASE_DIR / "media"
@ -87,11 +82,12 @@ MIDDLEWARE: list[str] = [
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
database_folder: Path = BASE_DIR / "data"
database_folder.mkdir(parents=True, exist_ok=True)
DATABASES: dict[str, dict[str, str | Path | bool]] = {
DATABASES: dict[str, dict[str, str | Path | bool | int]] = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": database_folder / "feedvault.sqlite3",
"ATOMIC_REQUESTS": True,
"timeout": 30,
},
}
@ -184,6 +180,8 @@ STORAGES: dict[str, dict[str, str]] = {
"BACKEND": "django.core.files.storage.FileSystemStorage",
},
"staticfiles": {
"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage",
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage"
if TESTING
else "whitenoise.storage.CompressedManifestStaticFilesStorage",
},
}