Use one thread for each domain when grabbing entries
This commit is contained in:
parent
d04fe12f80
commit
c3ebd9faa2
4 changed files with 121 additions and 28 deletions
|
|
@ -1,13 +1,41 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import timedelta
|
||||
from threading import Thread
|
||||
|
||||
from django.core.management.base import BaseCommand, no_translations
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress
|
||||
|
||||
from feedvault.feeds import grab_entries
|
||||
from feedvault.models import Entry, Feed
|
||||
from feedvault.models import Feed
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
class DomainUpdater(Thread):
|
||||
def __init__(self, feeds: list[Feed], progress: Progress, *args, **kwargs) -> None: # noqa: ANN002, ANN003
|
||||
"""Update feeds in a separate thread.
|
||||
|
||||
Args:
|
||||
feeds: The feeds to update.
|
||||
progress: The Rich progress bar.
|
||||
*args: Arbitrary positional arguments.
|
||||
**kwargs: Arbitrary keyword arguments.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
self.feeds: list[Feed] = feeds
|
||||
self.progress: Progress = progress
|
||||
|
||||
def run(self) -> None:
|
||||
with self.progress as progress:
|
||||
task = progress.add_task("[cyan]Updating feeds...", total=len(self.feeds))
|
||||
for feed in self.feeds:
|
||||
grab_entries(feed)
|
||||
progress.update(task, advance=1, description=f"[green]Updated {feed.feed_url}")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
|
@ -16,22 +44,23 @@ class Command(BaseCommand):
|
|||
|
||||
@no_translations
|
||||
def handle(self, *args, **options) -> None: # noqa: ANN002, ANN003, ARG002
|
||||
new_entries: int = 0
|
||||
|
||||
# Grab feeds that haven't been checked in 15 minutes OR haven't been checked at all
|
||||
for feed in Feed.objects.filter(
|
||||
feeds = Feed.objects.filter(
|
||||
Q(last_checked__lte=timezone.now() - timedelta(minutes=15)) | Q(last_checked__isnull=True),
|
||||
):
|
||||
entries: None | list[Entry] = grab_entries(feed)
|
||||
if not entries:
|
||||
self.stdout.write(f"No new entries for {feed.title}")
|
||||
continue
|
||||
)
|
||||
domain_feeds = defaultdict(list)
|
||||
|
||||
self.stdout.write(f"Updated {feed}")
|
||||
self.stdout.write(f"Added {len(entries)} new entries for {feed}")
|
||||
new_entries += len(entries)
|
||||
for feed in feeds:
|
||||
domain_feeds[feed.domain.pk].append(feed)
|
||||
|
||||
if new_entries:
|
||||
self.stdout.write(self.style.SUCCESS(f"Successfully updated feeds. Added {new_entries} new entries"))
|
||||
threads = []
|
||||
progress = Progress()
|
||||
|
||||
self.stdout.write("No new entries found")
|
||||
for feeds in domain_feeds.values():
|
||||
thread = DomainUpdater(feeds, progress)
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
console.log("[bold green]Successfully updated feeds")
|
||||
|
|
|
|||
|
|
@ -46,11 +46,6 @@ WSGI_APPLICATION = "feedvault.wsgi.application"
|
|||
NINJA_PAGINATION_PER_PAGE = 1000
|
||||
STATIC_URL = "static/"
|
||||
STATIC_ROOT: Path = BASE_DIR / "staticfiles"
|
||||
STATICFILES_STORAGE = (
|
||||
"django.contrib.staticfiles.storage.StaticFilesStorage"
|
||||
if TESTING
|
||||
else "whitenoise.storage.CompressedManifestStaticFilesStorage"
|
||||
)
|
||||
STATIC_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
MEDIA_URL = "media/"
|
||||
MEDIA_ROOT: Path = BASE_DIR / "media"
|
||||
|
|
@ -87,11 +82,12 @@ MIDDLEWARE: list[str] = [
|
|||
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
|
||||
database_folder: Path = BASE_DIR / "data"
|
||||
database_folder.mkdir(parents=True, exist_ok=True)
|
||||
DATABASES: dict[str, dict[str, str | Path | bool]] = {
|
||||
DATABASES: dict[str, dict[str, str | Path | bool | int]] = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.sqlite3",
|
||||
"NAME": database_folder / "feedvault.sqlite3",
|
||||
"ATOMIC_REQUESTS": True,
|
||||
"timeout": 30,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -184,6 +180,8 @@ STORAGES: dict[str, dict[str, str]] = {
|
|||
"BACKEND": "django.core.files.storage.FileSystemStorage",
|
||||
},
|
||||
"staticfiles": {
|
||||
"BACKEND": "whitenoise.storage.CompressedManifestStaticFilesStorage",
|
||||
"BACKEND": "django.contrib.staticfiles.storage.StaticFilesStorage"
|
||||
if TESTING
|
||||
else "whitenoise.storage.CompressedManifestStaticFilesStorage",
|
||||
},
|
||||
}
|
||||
|
|
|
|||
75
poetry.lock
generated
75
poetry.lock
generated
|
|
@ -600,17 +600,49 @@ six = ">=1.13.0"
|
|||
|
||||
[[package]]
|
||||
name = "json5"
|
||||
version = "0.9.22"
|
||||
version = "0.9.24"
|
||||
description = "A Python implementation of the JSON5 data format."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "json5-0.9.22-py3-none-any.whl", hash = "sha256:6621007c70897652f8b5d03885f732771c48d1925591ad989aa80c7e0e5ad32f"},
|
||||
{file = "json5-0.9.22.tar.gz", hash = "sha256:b729bde7650b2196a35903a597d2b704b8fdf8648bfb67368cfb79f1174a17bd"},
|
||||
{file = "json5-0.9.24-py3-none-any.whl", hash = "sha256:4ca101fd5c7cb47960c055ef8f4d0e31e15a7c6c48c3b6f1473fc83b6c462a13"},
|
||||
{file = "json5-0.9.24.tar.gz", hash = "sha256:0c638399421da959a20952782800e5c1a78c14e08e1dc9738fa10d8ec14d58c8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markdown-it-py"
|
||||
version = "3.0.0"
|
||||
description = "Python port of markdown-it. Markdown parsing, done right!"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
|
||||
{file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
mdurl = ">=0.1,<1.0"
|
||||
|
||||
[package.extras]
|
||||
dev = ["hypothesis"]
|
||||
benchmarking = ["psutil", "pytest", "pytest-benchmark"]
|
||||
code-style = ["pre-commit (>=3.0,<4.0)"]
|
||||
compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
|
||||
linkify = ["linkify-it-py (>=1,<3)"]
|
||||
plugins = ["mdit-py-plugins"]
|
||||
profiling = ["gprof2dot"]
|
||||
rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
|
||||
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
|
||||
|
||||
[[package]]
|
||||
name = "mdurl"
|
||||
version = "0.1.2"
|
||||
description = "Markdown URL utilities"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
|
||||
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
|
|
@ -755,6 +787,21 @@ files = [
|
|||
[package.dependencies]
|
||||
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.17.2"
|
||||
description = "Pygments is a syntax highlighting package written in Python."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"},
|
||||
{file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
plugins = ["importlib-metadata"]
|
||||
windows-terminal = ["colorama (>=0.4.6)"]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
|
|
@ -977,6 +1024,24 @@ urllib3 = ">=1.21.1,<3"
|
|||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "rich"
|
||||
version = "13.7.1"
|
||||
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
files = [
|
||||
{file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
|
||||
{file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
markdown-it-py = ">=2.2.0"
|
||||
pygments = ">=2.13.0,<3.0.0"
|
||||
|
||||
[package.extras]
|
||||
jupyter = ["ipywidgets (>=7.5.1,<9)"]
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.3.3"
|
||||
|
|
@ -1136,4 +1201,4 @@ brotli = ["Brotli"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "60d19db756f55b38f910b2167075ae87cea90478904adc217b21d746bec6b728"
|
||||
content-hash = "d83aef77b2c267562ab628a098c63f81bfe05e2f381a07a25500cec6b27d6864"
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ discord-webhook = "^1.3.1"
|
|||
django-ninja = "^1.1.0"
|
||||
django-debug-toolbar = "^4.3.0"
|
||||
whitenoise = {extras = ["brotli"], version = "^6.6.0"}
|
||||
rich = "^13.7.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ruff = "^0.3.0"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue