feedvault.se/feeds/models.py
Joakim Helleśen a02b5d5f66
All checks were successful
Deploy to Server / deploy (push) Successful in 11s
Add initial version of feeds app
2026-03-24 03:58:08 +01:00

136 lines
4 KiB
Python

import logging
from urllib.parse import urlparse
from django.contrib.postgres.indexes import GinIndex
from django.db import models
logger: logging.Logger = logging.getLogger("feeds.models")
class Feed(models.Model):
"""Represents the actual RSS/Atom feed URL and its metadata."""
url = models.URLField(
help_text="The canonical URL of the RSS/Atom feed. Must be unique.",
verbose_name="Feed URL",
max_length=2048,
unique=True,
)
domain = models.CharField(
help_text="Domain name extracted from the feed URL.",
verbose_name="Domain",
max_length=255,
db_index=True,
)
etag = models.CharField(
help_text="HTTP ETag header for conditional requests.",
verbose_name="ETag",
max_length=255,
blank=True,
default="",
)
last_modified = models.CharField(
help_text="HTTP Last-Modified header for conditional requests.",
verbose_name="Last Modified",
max_length=255,
blank=True,
default="",
)
is_active = models.BooleanField(
help_text="Whether this feed is currently being fetched.",
verbose_name="Is Active",
default=True,
)
created_at = models.DateTimeField(
help_text="Timestamp when this feed was first added.",
verbose_name="Created At",
auto_now_add=True,
)
last_fetched_at = models.DateTimeField(
help_text="Timestamp when this feed was last fetched.",
verbose_name="Last Fetched At",
blank=True,
null=True,
)
class Meta:
verbose_name = "Feed"
verbose_name_plural = "Feeds"
def __str__(self) -> str:
"""Return the feed URL as string representation."""
return self.url
def save(self, *args, **kwargs) -> None:
"""Override save to auto-populate domain from URL if not set."""
if not self.domain and self.url:
self.domain = str(urlparse(str(self.url)).netloc)
if logger.isEnabledFor(logging.DEBUG):
logger.debug(
"Auto-populated domain '%s' for feed URL: %s",
self.domain,
self.url,
)
super().save(*args, **kwargs)
class Entry(models.Model):
"""An archived entry (item/post) from a feed."""
feed = models.ForeignKey(
to="Feed",
help_text="The feed this entry was fetched from.",
on_delete=models.CASCADE,
related_name="entries",
verbose_name="Feed",
)
entry_id = models.CharField(
help_text="Unique entry ID (guid, id, or link) from the feed.",
verbose_name="Entry ID",
max_length=512,
db_index=True,
)
fetched_at = models.DateTimeField(
help_text="Timestamp when this entry was archived.",
verbose_name="Fetched At",
auto_now_add=True,
db_index=True,
)
published_at = models.DateTimeField(
help_text="Timestamp when this entry was published (if available).",
verbose_name="Published At",
db_index=True,
blank=True,
null=True,
)
content_hash = models.BigIntegerField(
help_text="xxhash64 integer of the entry content for deduplication.",
verbose_name="Content Hash",
db_index=True,
)
data = models.JSONField(
help_text="Parsed entry data as JSON.",
verbose_name="Entry Data",
blank=True,
null=True,
)
error_message = models.TextField(
help_text="Error message if archiving failed.",
verbose_name="Error Message",
blank=True,
default="",
)
class Meta:
unique_together = ("feed", "entry_id", "content_hash")
indexes = [
GinIndex(fields=["data"]),
]
verbose_name = "Entry"
verbose_name_plural = "Entries"
def __str__(self) -> str:
"""Return a string representation of the entry."""
return f"{self.feed.domain} entry {self.entry_id} at {self.fetched_at}"