Use one thread for each domain when grabbing entries
This commit is contained in:
parent
d04fe12f80
commit
c3ebd9faa2
4 changed files with 121 additions and 28 deletions
|
|
@ -1,13 +1,41 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import timedelta
|
||||
from threading import Thread
|
||||
|
||||
from django.core.management.base import BaseCommand, no_translations
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress
|
||||
|
||||
from feedvault.feeds import grab_entries
|
||||
from feedvault.models import Entry, Feed
|
||||
from feedvault.models import Feed
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
class DomainUpdater(Thread):
|
||||
def __init__(self, feeds: list[Feed], progress: Progress, *args, **kwargs) -> None: # noqa: ANN002, ANN003
|
||||
"""Update feeds in a separate thread.
|
||||
|
||||
Args:
|
||||
feeds: The feeds to update.
|
||||
progress: The Rich progress bar.
|
||||
*args: Arbitrary positional arguments.
|
||||
**kwargs: Arbitrary keyword arguments.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
self.feeds: list[Feed] = feeds
|
||||
self.progress: Progress = progress
|
||||
|
||||
def run(self) -> None:
|
||||
with self.progress as progress:
|
||||
task = progress.add_task("[cyan]Updating feeds...", total=len(self.feeds))
|
||||
for feed in self.feeds:
|
||||
grab_entries(feed)
|
||||
progress.update(task, advance=1, description=f"[green]Updated {feed.feed_url}")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
|
@ -16,22 +44,23 @@ class Command(BaseCommand):
|
|||
|
||||
@no_translations
|
||||
def handle(self, *args, **options) -> None: # noqa: ANN002, ANN003, ARG002
|
||||
new_entries: int = 0
|
||||
|
||||
# Grab feeds that haven't been checked in 15 minutes OR haven't been checked at all
|
||||
for feed in Feed.objects.filter(
|
||||
feeds = Feed.objects.filter(
|
||||
Q(last_checked__lte=timezone.now() - timedelta(minutes=15)) | Q(last_checked__isnull=True),
|
||||
):
|
||||
entries: None | list[Entry] = grab_entries(feed)
|
||||
if not entries:
|
||||
self.stdout.write(f"No new entries for {feed.title}")
|
||||
continue
|
||||
)
|
||||
domain_feeds = defaultdict(list)
|
||||
|
||||
self.stdout.write(f"Updated {feed}")
|
||||
self.stdout.write(f"Added {len(entries)} new entries for {feed}")
|
||||
new_entries += len(entries)
|
||||
for feed in feeds:
|
||||
domain_feeds[feed.domain.pk].append(feed)
|
||||
|
||||
if new_entries:
|
||||
self.stdout.write(self.style.SUCCESS(f"Successfully updated feeds. Added {new_entries} new entries"))
|
||||
threads = []
|
||||
progress = Progress()
|
||||
|
||||
self.stdout.write("No new entries found")
|
||||
for feeds in domain_feeds.values():
|
||||
thread = DomainUpdater(feeds, progress)
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
console.log("[bold green]Successfully updated feeds")
|
||||
|
|
|
|||
|
|
@ -46,11 +46,6 @@ WSGI_APPLICATION = "feedvault.wsgi.application"
|
|||
NINJA_PAGINATION_PER_PAGE = 1000
|
||||
STATIC_URL = "static/"
|
||||
STATIC_ROOT: Path = BASE_DIR / "staticfiles"
|
||||
STATICFILES_STORAGE = (
|
||||
"django.contrib.staticfiles.storage.StaticFilesStorage"
|
||||
if TESTING
|
||||
else "whitenoise.storage.CompressedManifestStaticFilesStorage"
|
||||
)
|
||||
STATIC_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
MEDIA_URL = "media/"
|
||||
MEDIA_ROOT: Path = BASE_DIR / "media"
|
||||
|
|
@ -87,11 +82,12 @@ MIDDLEWARE: list[str] = [
|
|||
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
|
||||
database_folder: Path = BASE_DIR / "data"
|
||||
database_folder.mkdir(parents=True, exist_ok=True)
|
||||
DATABASES: dict[str, dict[str, str | Path | bool]] = {
|
||||
DATABASES: dict[str, dict[str, str | Path | bool | int]] = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.sqlite3",
|
||||
"NAME": database_folder / "feedvault.sqlite3",
|
||||
"ATOMIC_REQUESTS": True,
|
||||
"timeout": 30,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -184,6 +180,8 @@ STORAGES: dict[str, dict[str, str]] = {
|
|||
"BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
},
|
||||
"staticfiles": {
|
||||
"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage",
|
||||
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage"
|
||||
if TESTING
|
||||
else "whitenoise.storage.CompressedManifestStaticFilesStorage",
|
||||
},
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue