Add initial version of feeds app
All checks were successful
Deploy to Server / deploy (push) Successful in 11s

This commit is contained in:
Joakim Hellsén 2026-03-24 03:58:08 +01:00
commit a02b5d5f66
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
17 changed files with 993 additions and 15 deletions

3
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,3 @@
{
"python.analysis.typeCheckingMode": "standard"
}

View file

@ -4,10 +4,13 @@ import sys
from pathlib import Path
from typing import Any
import django_stubs_ext
import sentry_sdk
from dotenv import load_dotenv
from platformdirs import user_data_dir
django_stubs_ext.monkeypatch()
logger: logging.Logger = logging.getLogger("feedvault.settings")
load_dotenv(verbose=True)
@ -224,3 +227,6 @@ CELERY_BROKER_URL: str = REDIS_URL_CELERY
CELERY_RESULT_BACKEND = "django-db"
CELERY_RESULT_EXTENDED = True
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
USER_AGENT = "FeedVault/1.0 (+https://feedvault.se/bot; archiving feeds; contact: Discord: TheLovinator#9276, Email: bot@feedvault.se)"
BOT_CONTACT_EMAIL = "bot@feedvault.se"

View file

View file

View file

@ -0,0 +1,34 @@
from typing import TYPE_CHECKING
from django.core.management.base import BaseCommand
from feeds.models import Feed
from feeds.services import fetch_and_archive_feed
if TYPE_CHECKING:
from django.core.management.base import CommandParser
class Command(BaseCommand):
"""Django management command to fetch and archive a feed by URL."""
help = "Fetch and archive a feed by URL."
def add_arguments(self, parser: CommandParser) -> None:
"""Add URL argument to the command."""
parser.add_argument("url", type=str, help="Feed URL to fetch and archive.")
def handle(self, *args, **options) -> None: # noqa: ARG002
"""Handle the command execution."""
url: str = options["url"]
feed, created = Feed.objects.get_or_create(url=url)
if created:
self.stdout.write(self.style.SUCCESS(f"Created new feed for URL: {url}"))
new_entries: int = fetch_and_archive_feed(feed)
if new_entries:
msg: str = f"Archived {new_entries} new entr{'y' if new_entries == 1 else 'ies'} for URL: {url}"
self.stdout.write(self.style.SUCCESS(msg))
else:
msg: str = "\tFeed is up to date, but no new entries were archived."
self.stdout.write(self.style.WARNING(msg))

View file

@ -0,0 +1,186 @@
# Generated by Django 6.0.3 on 2026-03-24 01:13
import django.contrib.postgres.indexes
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
"""Initial migration for Feed and Entry models."""
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="Feed",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"url",
models.URLField(
help_text="The canonical URL of the RSS/Atom feed. Must be unique.",
max_length=2048,
unique=True,
verbose_name="Feed URL",
),
),
(
"domain",
models.CharField(
db_index=True,
help_text="Domain name extracted from the feed URL.",
max_length=255,
verbose_name="Domain",
),
),
(
"etag",
models.CharField(
blank=True,
default="",
help_text="HTTP ETag header for conditional requests.",
max_length=255,
verbose_name="ETag",
),
),
(
"last_modified",
models.CharField(
blank=True,
default="",
help_text="HTTP Last-Modified header for conditional requests.",
max_length=255,
verbose_name="Last Modified",
),
),
(
"is_active",
models.BooleanField(
default=True,
help_text="Whether this feed is currently being fetched.",
verbose_name="Is Active",
),
),
(
"created_at",
models.DateTimeField(
auto_now_add=True,
help_text="Timestamp when this feed was first added.",
verbose_name="Created At",
),
),
(
"last_fetched_at",
models.DateTimeField(
blank=True,
help_text="Timestamp when this feed was last fetched.",
null=True,
verbose_name="Last Fetched At",
),
),
],
options={
"verbose_name": "Feed",
"verbose_name_plural": "Feeds",
},
),
migrations.CreateModel(
name="Entry",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"entry_id",
models.CharField(
db_index=True,
help_text="Unique entry ID (guid, id, or link) from the feed.",
max_length=512,
verbose_name="Entry ID",
),
),
(
"fetched_at",
models.DateTimeField(
auto_now_add=True,
db_index=True,
help_text="Timestamp when this entry was archived.",
verbose_name="Fetched At",
),
),
(
"published_at",
models.DateTimeField(
blank=True,
db_index=True,
help_text="Timestamp when this entry was published (if available).",
null=True,
verbose_name="Published At",
),
),
(
"content_hash",
models.BigIntegerField(
db_index=True,
help_text="xxhash64 integer of the entry content for deduplication.",
verbose_name="Content Hash",
),
),
(
"data",
models.JSONField(
blank=True,
help_text="Parsed entry data as JSON.",
null=True,
verbose_name="Entry Data",
),
),
(
"error_message",
models.TextField(
blank=True,
default="",
help_text="Error message if archiving failed.",
verbose_name="Error Message",
),
),
(
"feed",
models.ForeignKey(
help_text="The feed this entry was fetched from.",
on_delete=django.db.models.deletion.CASCADE,
related_name="entries",
to="feeds.feed",
verbose_name="Feed",
),
),
],
options={
"verbose_name": "Entry",
"verbose_name_plural": "Entries",
"indexes": [
django.contrib.postgres.indexes.GinIndex(
fields=["data"], name="feeds_entry_data_c87562_gin"
)
],
"unique_together": {("feed", "entry_id", "content_hash")},
},
),
]

View file

@ -0,0 +1,136 @@
import logging
from urllib.parse import urlparse
from django.contrib.postgres.indexes import GinIndex
from django.db import models
logger: logging.Logger = logging.getLogger("feeds.models")
class Feed(models.Model):
"""Represents the actual RSS/Atom feed URL and its metadata."""
url = models.URLField(
help_text="The canonical URL of the RSS/Atom feed. Must be unique.",
verbose_name="Feed URL",
max_length=2048,
unique=True,
)
domain = models.CharField(
help_text="Domain name extracted from the feed URL.",
verbose_name="Domain",
max_length=255,
db_index=True,
)
etag = models.CharField(
help_text="HTTP ETag header for conditional requests.",
verbose_name="ETag",
max_length=255,
blank=True,
default="",
)
last_modified = models.CharField(
help_text="HTTP Last-Modified header for conditional requests.",
verbose_name="Last Modified",
max_length=255,
blank=True,
default="",
)
is_active = models.BooleanField(
help_text="Whether this feed is currently being fetched.",
verbose_name="Is Active",
default=True,
)
created_at = models.DateTimeField(
help_text="Timestamp when this feed was first added.",
verbose_name="Created At",
auto_now_add=True,
)
last_fetched_at = models.DateTimeField(
help_text="Timestamp when this feed was last fetched.",
verbose_name="Last Fetched At",
blank=True,
null=True,
)
class Meta:
verbose_name = "Feed"
verbose_name_plural = "Feeds"
def __str__(self) -> str:
"""Return the feed URL as string representation."""
return self.url
def save(self, *args, **kwargs) -> None:
"""Override save to auto-populate domain from URL if not set."""
if not self.domain and self.url:
self.domain = str(urlparse(str(self.url)).netloc)
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
"Auto-populated domain '%s' for feed URL: %s",
self.domain,
self.url,
)
super().save(*args, **kwargs)
class Entry(models.Model):
"""An archived entry (item/post) from a feed."""
feed = models.ForeignKey(
to="Feed",
help_text="The feed this entry was fetched from.",
on_delete=models.CASCADE,
related_name="entries",
verbose_name="Feed",
)
entry_id = models.CharField(
help_text="Unique entry ID (guid, id, or link) from the feed.",
verbose_name="Entry ID",
max_length=512,
db_index=True,
)
fetched_at = models.DateTimeField(
help_text="Timestamp when this entry was archived.",
verbose_name="Fetched At",
auto_now_add=True,
db_index=True,
)
published_at = models.DateTimeField(
help_text="Timestamp when this entry was published (if available).",
verbose_name="Published At",
db_index=True,
blank=True,
null=True,
)
content_hash = models.BigIntegerField(
help_text="xxhash64 integer of the entry content for deduplication.",
verbose_name="Content Hash",
db_index=True,
)
data = models.JSONField(
help_text="Parsed entry data as JSON.",
verbose_name="Entry Data",
blank=True,
null=True,
)
error_message = models.TextField(
help_text="Error message if archiving failed.",
verbose_name="Error Message",
blank=True,
default="",
)
class Meta:
unique_together = ("feed", "entry_id", "content_hash")
indexes = [
GinIndex(fields=["data"]),
]
verbose_name = "Entry"
verbose_name_plural = "Entries"
def __str__(self) -> str:
"""Return a string representation of the entry."""
return f"{self.feed.domain} entry {self.entry_id} at {self.fetched_at}"

191
feeds/services.py Normal file
View file

@ -0,0 +1,191 @@
from typing import TYPE_CHECKING
from typing import Any
from xml.parsers.expat import ExpatError
import dateparser
import niquests
import xmltodict
import xxhash
from django.conf import settings
from django.utils import timezone
from feeds.models import Entry
if TYPE_CHECKING:
import datetime
from feeds.models import Feed
HTTP_OK = 200
HTTP_NOT_MODIFIED = 304
def extract_id(val: str | dict | None) -> str | None:
"""Extracts a string ID from a guid or id field, handling both string and dict formats.
Args:
val (str | dict | None): The value to extract the ID from, which can be a string, a dict (with possible '#text' or '@id' keys), or None
Returns:
str | None: The extracted ID as a string, or None if it cannot be extracted
"""
if isinstance(val, dict):
# RSS guid or Atom id as dict: prefer '#text', fallback to str(val)
return val.get("#text") or val.get("@id") or str(val)
return val
def fetch_and_archive_feed(feed: Feed) -> int:
"""Fetches the feed, parses entries, deduplicates, and archives new entries.
Returns:
The number of new entries archived.
"""
request_headers: dict[str, str] = get_request_headers()
if feed.etag:
request_headers["If-None-Match"] = feed.etag
if feed.last_modified:
request_headers["If-Modified-Since"] = feed.last_modified
try:
response: niquests.Response = niquests.get(
feed.url,
headers=request_headers,
timeout=10,
)
if response.status_code == HTTP_NOT_MODIFIED:
feed.last_fetched_at = timezone.now()
feed.save(update_fields=["last_fetched_at"])
return 0
raw_xml: bytes = response.content or b""
error_msg: str = ""
parsed_data: dict[str, Any] | None = None
if response.status_code == HTTP_OK:
try:
parsed_data = xmltodict.parse(
raw_xml.decode("utf-8", errors="replace"),
process_namespaces=False,
)
except ExpatError as e:
error_msg = f"XML Parsing Error: {e!s}"
# Extract entries from parsed_data
entries: list[dict[str, Any]] = extract_feed_entries(parsed_data)
new_count = 0
for entry in entries:
content_hash: int = calculate_content_hash(entry)
entry_id: str = (
extract_id(entry.get("guid"))
or extract_id(entry.get("id"))
or entry.get("link")
or str(content_hash)
)
if not isinstance(entry_id, str):
entry_id = str(entry_id)
published_at: datetime.datetime | None = None
for date_field in ("published", "pubDate", "updated", "created"):
if entry.get(date_field):
published_at = dateparser.parse(entry[date_field])
if published_at:
break
# Deduplicate: skip if entry with same feed+entry_id+content_hash exists
exists: bool = Entry.objects.filter(
feed=feed,
entry_id=entry_id,
content_hash=content_hash,
).exists()
if not exists:
Entry.objects.create(
feed=feed,
entry_id=entry_id,
fetched_at=timezone.now(),
published_at=published_at,
content_hash=content_hash,
data=entry,
error_message=error_msg,
)
new_count += 1
feed.etag = response.headers.get("ETag", "")
feed.last_modified = response.headers.get("Last-Modified", "")
feed.last_fetched_at = timezone.now()
feed.save()
except niquests.exceptions.RequestException as e:
Entry.objects.create(
feed=feed,
entry_id="__error__",
fetched_at=timezone.now(),
published_at=None,
content_hash=0,
data=None,
error_message=str(e),
)
return 0
else:
return new_count
def calculate_content_hash(entry: dict[str, Any]) -> int:
"""Calculates a content hash for the entry using xxhash64.
Args:
entry (dict[str, Any]): The entry data as a dictionary.
Returns:
int: A 64-bit integer hash of the entry content, suitable for deduplication.
"""
entry_bytes: bytes = str(entry).encode("utf-8")
entry_hash_int: int = xxhash.xxh64_intdigest(entry_bytes)
# Ensure content_hash fits in signed 64-bit integer
content_hash: int = entry_hash_int & 0x7FFFFFFFFFFFFFFF
return content_hash
def extract_feed_entries(parsed_data: dict[str, Any] | None) -> list[dict[str, Any]]:
"""Extracts a list of entries from the parsed feed data, handling both RSS and Atom formats.
Args:
parsed_data (dict[str, Any] | None): The parsed feed data as a dictionary, or None if parsing failed
Returns:
list[dict[str, Any]]: A list of entries extracted from the feed, where each entry is represented as a dictionary. If no entries are found or if parsed_data is None, an empty list is returned.
"""
entries: list[dict[str, Any]] = []
if parsed_data:
# RSS: channel > item; Atom: feed > entry
items: list[dict[str, Any]] | dict[str, Any] = []
if "rss" in parsed_data:
items = parsed_data["rss"].get("channel", {}).get("item", [])
elif "feed" in parsed_data:
items = parsed_data["feed"].get("entry", [])
if isinstance(items, dict):
items = [items]
entries = items
return entries
def get_request_headers() -> dict[str, str]:
"""Helper function to get standard request headers for fetching feeds.
Returns:
dict[str, str]: A dictionary of HTTP headers to include in feed fetch requests.
"""
# https://blog.cloudflare.com/verified-bots-with-cryptography/
# https://www.cloudflare.com/lp/verified-bots/
# TODO(TheLovinator): We have to sign our requests # noqa: TD003
request_headers: dict[str, str] = {
"User-Agent": settings.USER_AGENT,
"From": settings.BOT_CONTACT_EMAIL,
}
return request_headers

24
feeds/tasks.py Normal file
View file

@ -0,0 +1,24 @@
from celery import shared_task
from feeds.models import Feed
from feeds.services import fetch_and_archive_feed
@shared_task
def archive_feed_task(feed_id: int) -> str:
"""Celery task to fetch and archive a feed by its ID.
Args:
feed_id: The ID of the Feed to archive.
Returns:
A message indicating the result of the archiving process.
"""
try:
feed: Feed = Feed.objects.get(id=feed_id)
except Feed.DoesNotExist:
return f"Feed with id {feed_id} does not exist."
new_entries_count: int = fetch_and_archive_feed(feed)
if new_entries_count > 0:
return f"Archived {new_entries_count} new entries for {feed.url}"
return f"No new entries archived for {feed.url}"

1
feeds/tests/__init__.py Normal file
View file

@ -0,0 +1 @@
# This file marks the directory as a Python package.

View file

@ -0,0 +1,117 @@
import os
import threading
from http.server import HTTPServer
from http.server import SimpleHTTPRequestHandler
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
from feeds.models import Entry
from feeds.models import Feed
from feeds.services import fetch_and_archive_feed
if TYPE_CHECKING:
from pathlib import Path
@pytest.mark.django_db
def test_entry_id_string_guid_dict(tmp_path: Path) -> None:
"""Test that entry_id is always a string, even if guid is a dict."""
# Prepare a fake RSS feed with guid as dict (attributes)
feed_content = """
<rss version="2.0">
<channel>
<title>Test Feed</title>
<link>http://example.com/</link>
<description>Test feed description</description>
<item>
<title>Item 1</title>
<link>http://example.com/item1</link>
<guid isPermaLink="true">http://example.com/item1</guid>
</item>
</channel>
</rss>
"""
feed_path: Path = tmp_path / "test_feed.xml"
feed_path.write_text(feed_content, encoding="utf-8")
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
fetch_and_archive_feed(feed)
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert isinstance(entry.entry_id, str)
assert entry.entry_id == "http://example.com/item1"
server.shutdown()
@pytest.mark.django_db
def test_entry_id_string_guid_string(tmp_path: Path) -> None:
"""Test that entry_id is a string when guid is a plain string."""
feed_content = """
<rss version="2.0">
<channel>
<title>Test Feed</title>
<link>http://example.com/</link>
<description>Test feed description</description>
<item>
<title>Item 2</title>
<link>http://example.com/item2</link>
<guid>http://example.com/item2</guid>
</item>
</channel>
</rss>
"""
feed_path: Path = tmp_path / "test_feed.xml"
feed_path.write_text(feed_content, encoding="utf-8")
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
fetch_and_archive_feed(feed)
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert isinstance(entry.entry_id, str)
assert entry.entry_id == "http://example.com/item2"
server.shutdown()
@pytest.mark.django_db
def test_entry_id_fallback_to_link(tmp_path: Path) -> None:
"""Test that entry_id falls back to link if guid/id missing."""
feed_content = """
<rss version="2.0">
<channel>
<title>Test Feed</title>
<link>http://example.com/</link>
<description>Test feed description</description>
<item>
<title>Item 3</title>
<link>http://example.com/item3</link>
</item>
</channel>
</rss>
"""
feed_path: Path = tmp_path / "test_feed.xml"
feed_path.write_text(feed_content, encoding="utf-8")
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
fetch_and_archive_feed(feed)
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert isinstance(entry.entry_id, str)
assert entry.entry_id == "http://example.com/item3"
server.shutdown()

View file

@ -0,0 +1,111 @@
import os
import threading
from http.server import HTTPServer
from http.server import SimpleHTTPRequestHandler
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
from feeds.models import Entry
from feeds.models import Feed
from feeds.services import fetch_and_archive_feed
if TYPE_CHECKING:
from pathlib import Path
@pytest.mark.django_db
def test_entry_id_id_dict(tmp_path: Path) -> None:
"""Test that entry_id is a string when id is a dict."""
feed_content = """
<feed xmlns='http://www.w3.org/2005/Atom'>
<title>Test Atom Feed</title>
<id>http://example.com/feed</id>
<entry>
<title>Entry 1</title>
<id scheme='urn:uuid'>urn:uuid:1234</id>
<link href='http://example.com/entry1'/>
</entry>
</feed>
"""
feed_path: Path = tmp_path / "test_feed.xml"
feed_path.write_text(feed_content, encoding="utf-8")
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
fetch_and_archive_feed(feed)
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert isinstance(entry.entry_id, str)
assert "urn:uuid:1234" in entry.entry_id
server.shutdown()
@pytest.mark.django_db
def test_entry_id_all_fields_missing(tmp_path: Path) -> None:
"""Test that entry_id falls back to content_hash if guid/id/link missing."""
feed_content = """
<rss version='2.0'>
<channel>
<title>Test Feed</title>
<item>
<title>Item with no id</title>
</item>
</channel>
</rss>
"""
feed_path: Path = tmp_path / "test_feed.xml"
feed_path.write_text(feed_content, encoding="utf-8")
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
fetch_and_archive_feed(feed)
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert isinstance(entry.entry_id, str)
# Should be a hash string (digits only)
assert entry.entry_id.isdigit() or entry.entry_id.lstrip("-").isdigit()
server.shutdown()
@pytest.mark.django_db
def test_entry_id_malformed_guid(tmp_path: Path) -> None:
"""Test that entry_id handles malformed guid/id gracefully."""
feed_content = """
<rss version='2.0'>
<channel>
<title>Test Feed</title>
<item>
<title>Malformed guid</title>
<guid></guid>
</item>
</channel>
</rss>
"""
feed_path: Path = tmp_path / "test_feed.xml"
feed_path.write_text(feed_content, encoding="utf-8")
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
fetch_and_archive_feed(feed)
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert isinstance(entry.entry_id, str)
# Should fallback to content_hash
assert entry.entry_id.isdigit() or entry.entry_id.lstrip("-").isdigit()
server.shutdown()

View file

@ -0,0 +1,64 @@
import os
import threading
from http.server import HTTPServer
from http.server import SimpleHTTPRequestHandler
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pathlib import Path
import pytest
from feeds.models import Entry
from feeds.models import Feed
from feeds.services import fetch_and_archive_feed
@pytest.mark.django_db
def test_fetch_and_archive_feed_xml(tmp_path: Path) -> None:
"""Test fetching and archiving a simple XML feed using a local HTTP server."""
# Use a local test XML file as a feed source
test_feed_path: Path = tmp_path / "test_feed.xml"
test_feed_path.write_text(
encoding="utf-8",
data="""
<rss version='2.0'>
<channel>
<title>Test Feed</title>
<link>http://example.com/</link>
<description>Test feed description</description>
<item>
<title>Item 1</title>
<link>http://example.com/item1</link>
<description>Item 1 description</description>
</item>
</channel>
</rss>
""",
)
# Serve the file using a simple HTTP server
os.chdir(tmp_path)
server = HTTPServer(("localhost", 0), SimpleHTTPRequestHandler)
port: int = server.server_address[1]
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
url: str = f"http://localhost:{port}/test_feed.xml"
feed: Feed = Feed.objects.create(url=url, domain="localhost")
new_entries: int = fetch_and_archive_feed(feed)
assert new_entries == 1
# Check that the entry was archived and contains the expected data
entry: Entry | None = Entry.objects.filter(feed=feed).first()
assert entry is not None
assert entry.data is not None
assert entry.data["title"] == "Item 1"
assert Entry.objects.filter(feed=feed).count() == 1
# Clean up: stop the server and wait for the thread to finish
server.shutdown()
# Wait until the thread terminates.
# This ensures the server is fully stopped before the test ends.
thread.join()

39
feeds/tests/twitch-campaigns.xml vendored Normal file

File diff suppressed because one or more lines are too long

View file

@ -1,26 +1,15 @@
from typing import TYPE_CHECKING
from django.http import HttpResponse
from django.urls import path
from . import views
if TYPE_CHECKING:
from django.http import HttpRequest
from django.urls import URLPattern
from django.urls import URLResolver
def index(request: HttpRequest) -> HttpResponse:
"""View for the index page.
Args:
request: The HTTP request object.
Returns:
HttpResponse: A simple HTTP response with a greeting message.
"""
return HttpResponse("Hello, world!")
urlpatterns: list[URLPattern | URLResolver] = [
path("", index, name="index"),
path("", views.feed_list, name="feed-list"),
path("feeds/<int:feed_id>/", views.feed_detail, name="feed-detail"),
]

View file

@ -0,0 +1,70 @@
from typing import TYPE_CHECKING
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from feeds.models import Entry
from feeds.models import Feed
if TYPE_CHECKING:
from django.http import HttpRequest
from pytest_django.asserts import QuerySet
def feed_list(request: HttpRequest) -> HttpResponse:
"""View to list all feeds.
Returns:
HttpResponse: An HTML response containing the list of feeds.
"""
feeds = Feed.objects.all().order_by("id")
html = [
"<!DOCTYPE html>",
"<html><head><title>FeedVault - Feeds</title></head><body>",
"<h1>Feed List</h1>",
"<ul>",
]
html.extend(
f'<li><a href="/feeds/{feed.pk}/">{feed.url}</a></li>' for feed in feeds
)
html.extend(("</ul>", "</body></html>"))
return HttpResponse("\n".join(html))
def feed_detail(request: HttpRequest, feed_id: int) -> HttpResponse:
"""View to display the details of a specific feed.
Args:
request (HttpRequest): The HTTP request object.
feed_id (int): The ID of the feed to display.
Returns:
HttpResponse: An HTML response containing the feed details and its entries.
"""
feed: Feed = get_object_or_404(Feed, id=feed_id)
entries: QuerySet[Entry, Entry] = Entry.objects.filter(feed=feed).order_by(
"-published_at",
"-fetched_at",
)[:50]
html: list[str] = [
"<!DOCTYPE html>",
f"<html><head><title>FeedVault - {feed.url}</title></head><body>",
"<h1>Feed Detail</h1>",
f"<p><b>URL:</b> {feed.url}</p>",
f"<p><b>Domain:</b> {feed.domain}</p>",
f"<p><b>Active:</b> {'yes' if feed.is_active else 'no'}</p>",
f"<p><b>Created:</b> {feed.created_at}</p>",
f"<p><b>Last fetched:</b> {feed.last_fetched_at}</p>",
"<h2>Entries (latest 50)</h2>",
"<ul>",
]
for entry in entries:
title: str | None = entry.data.get("title") if entry.data else None
summary: str | None = entry.data.get("summary") if entry.data else None
snippet: str = title or summary or "[no title]"
html.append(
f"<li><b>{entry.published_at or entry.fetched_at}:</b> {snippet} <small>(id: {entry.entry_id})</small></li>",
)
html.extend(("</ul>", '<p><a href="/">Back to list</a></p>', "</body></html>"))
return HttpResponse("\n".join(html))

View file

@ -11,18 +11,24 @@ dependencies = [
"django-celery-results",
"django-debug-toolbar",
"django-silk[formatting]",
"django-stubs-ext",
"django",
"flower",
"gunicorn",
"hiredis",
"index-now-for-python",
"niquests",
"platformdirs",
"psycopg[binary]",
"pydantic",
"python-dotenv",
"redis",
"sentry-sdk",
"setproctitle",
"sitemap-parser",
"xmltodict",
"dateparser>=1.3.0",
"xxhash>=3.6.0",
]
[dependency-groups]
@ -36,6 +42,7 @@ dev = [
"pytest-randomly",
"pytest-xdist[psutil]",
"pytest",
"types-xmltodict",
]
[tool.pytest.ini_options]
DJANGO_SETTINGS_MODULE = "config.settings"