Use one thread for each domain when grabbing entries

This commit is contained in:
Joakim Hellsén 2024-03-17 23:36:56 +01:00
commit c3ebd9faa2
No known key found for this signature in database
GPG key ID: D196AE66FEBE1DC9
4 changed files with 121 additions and 28 deletions

View file

@ -1,13 +1,41 @@
from __future__ import annotations
from collections import defaultdict
from datetime import timedelta
from threading import Thread
from django.core.management.base import BaseCommand, no_translations
from django.db.models import Q
from django.utils import timezone
from rich.console import Console
from rich.progress import Progress
from feedvault.feeds import grab_entries
from feedvault.models import Entry, Feed
from feedvault.models import Feed
console = Console()
class DomainUpdater(Thread):
def __init__(self, feeds: list[Feed], progress: Progress, *args, **kwargs) -> None: # noqa: ANN002, ANN003
"""Update feeds in a separate thread.
Args:
feeds: The feeds to update.
progress: The Rich progress bar.
*args: Arbitrary positional arguments.
**kwargs: Arbitrary keyword arguments.
"""
super().__init__(*args, **kwargs)
self.feeds: list[Feed] = feeds
self.progress: Progress = progress
def run(self) -> None:
with self.progress as progress:
task = progress.add_task("[cyan]Updating feeds...", total=len(self.feeds))
for feed in self.feeds:
grab_entries(feed)
progress.update(task, advance=1, description=f"[green]Updated {feed.feed_url}")
class Command(BaseCommand):
@ -16,22 +44,23 @@ class Command(BaseCommand):
@no_translations
def handle(self, *args, **options) -> None: # noqa: ANN002, ANN003, ARG002
new_entries: int = 0
# Grab feeds that haven't been checked in 15 minutes OR haven't been checked at all
for feed in Feed.objects.filter(
feeds = Feed.objects.filter(
Q(last_checked__lte=timezone.now() - timedelta(minutes=15)) | Q(last_checked__isnull=True),
):
entries: None | list[Entry] = grab_entries(feed)
if not entries:
self.stdout.write(f"No new entries for {feed.title}")
continue
)
domain_feeds = defaultdict(list)
self.stdout.write(f"Updated {feed}")
self.stdout.write(f"Added {len(entries)} new entries for {feed}")
new_entries += len(entries)
for feed in feeds:
domain_feeds[feed.domain.pk].append(feed)
if new_entries:
self.stdout.write(self.style.SUCCESS(f"Successfully updated feeds. Added {new_entries} new entries"))
threads = []
progress = Progress()
self.stdout.write("No new entries found")
for feeds in domain_feeds.values():
thread = DomainUpdater(feeds, progress)
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
console.log("[bold green]Successfully updated feeds")

View file

@ -46,11 +46,6 @@ WSGI_APPLICATION = "feedvault.wsgi.application"
NINJA_PAGINATION_PER_PAGE = 1000
STATIC_URL = "static/"
STATIC_ROOT: Path = BASE_DIR / "staticfiles"
STATICFILES_STORAGE = (
"django.contrib.staticfiles.storage.StaticFilesStorage"
if TESTING
else "whitenoise.storage.CompressedManifestStaticFilesStorage"
)
STATIC_ROOT.mkdir(parents=True, exist_ok=True)
MEDIA_URL = "media/"
MEDIA_ROOT: Path = BASE_DIR / "media"
@ -87,11 +82,12 @@ MIDDLEWARE: list[str] = [
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
database_folder: Path = BASE_DIR / "data"
database_folder.mkdir(parents=True, exist_ok=True)
DATABASES: dict[str, dict[str, str | Path | bool]] = {
DATABASES: dict[str, dict[str, str | Path | bool | int]] = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": database_folder / "feedvault.sqlite3",
"ATOMIC_REQUESTS": True,
"timeout": 30,
},
}
@ -184,6 +180,8 @@ STORAGES: dict[str, dict[str, str]] = {
"BACKEND": "django.core.files.storage.FileSystemStorage",
},
"staticfiles": {
"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage",
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage"
if TESTING
else "whitenoise.storage.CompressedManifestStaticFilesStorage",
},
}

75
poetry.lock generated
View file

@ -600,17 +600,49 @@ six = ">=1.13.0"
[[package]]
name = "json5"
version = "0.9.22"
version = "0.9.24"
description = "A Python implementation of the JSON5 data format."
optional = false
python-versions = ">=3.8"
files = [
{file = "json5-0.9.22-py3-none-any.whl", hash = "sha256:6621007c70897652f8b5d03885f732771c48d1925591ad989aa80c7e0e5ad32f"},
{file = "json5-0.9.22.tar.gz", hash = "sha256:b729bde7650b2196a35903a597d2b704b8fdf8648bfb67368cfb79f1174a17bd"},
{file = "json5-0.9.24-py3-none-any.whl", hash = "sha256:4ca101fd5c7cb47960c055ef8f4d0e31e15a7c6c48c3b6f1473fc83b6c462a13"},
{file = "json5-0.9.24.tar.gz", hash = "sha256:0c638399421da959a20952782800e5c1a78c14e08e1dc9738fa10d8ec14d58c8"},
]
[[package]]
name = "markdown-it-py"
version = "3.0.0"
description = "Python port of markdown-it. Markdown parsing, done right!"
optional = false
python-versions = ">=3.8"
files = [
{file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
{file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
]
[package.dependencies]
mdurl = ">=0.1,<1.0"
[package.extras]
dev = ["hypothesis"]
benchmarking = ["psutil", "pytest", "pytest-benchmark"]
code-style = ["pre-commit (>=3.0,<4.0)"]
compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
linkify = ["linkify-it-py (>=1,<3)"]
plugins = ["mdit-py-plugins"]
profiling = ["gprof2dot"]
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
[[package]]
name = "mdurl"
version = "0.1.2"
description = "Markdown URL utilities"
optional = false
python-versions = ">=3.7"
files = [
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
]
[[package]]
name = "packaging"
@ -755,6 +787,21 @@ files = [
[package.dependencies]
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
[[package]]
name = "pygments"
version = "2.17.2"
description = "Pygments is a syntax highlighting package written in Python."
optional = false
python-versions = ">=3.7"
files = [
{file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"},
{file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"},
]
[package.extras]
plugins = ["importlib-metadata"]
windows-terminal = ["colorama (>=0.4.6)"]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@ -977,6 +1024,24 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "rich"
version = "13.7.1"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
optional = false
python-versions = ">=3.7.0"
files = [
{file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
{file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
]
[package.dependencies]
markdown-it-py = ">=2.2.0"
pygments = ">=2.13.0,<3.0.0"
[package.extras]
jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]]
name = "ruff"
version = "0.3.3"
@ -1136,4 +1201,4 @@ brotli = ["Brotli"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "60d19db756f55b38f910b2167075ae87cea90478904adc217b21d746bec6b728"
content-hash = "d83aef77b2c267562ab628a098c63f81bfe05e2f381a07a25500cec6b27d6864"

View file

@ -16,6 +16,7 @@ discord-webhook = "^1.3.1"
django-ninja = "^1.1.0"
django-debug-toolbar = "^4.3.0"
whitenoise = {extras = ["brotli"], version = "^6.6.0"}
rich = "^13.7.1"
[tool.poetry.group.dev.dependencies]
ruff = "^0.3.0"