Add initial version of feeds app
All checks were successful
Deploy to Server / deploy (push) Successful in 11s
All checks were successful
Deploy to Server / deploy (push) Successful in 11s
This commit is contained in:
parent
e889b58aec
commit
a02b5d5f66
17 changed files with 993 additions and 15 deletions
136
feeds/models.py
136
feeds/models.py
|
|
@ -0,0 +1,136 @@
|
|||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from django.contrib.postgres.indexes import GinIndex
|
||||
from django.db import models
|
||||
|
||||
logger: logging.Logger = logging.getLogger("feeds.models")
|
||||
|
||||
|
||||
class Feed(models.Model):
|
||||
"""Represents the actual RSS/Atom feed URL and its metadata."""
|
||||
|
||||
url = models.URLField(
|
||||
help_text="The canonical URL of the RSS/Atom feed. Must be unique.",
|
||||
verbose_name="Feed URL",
|
||||
max_length=2048,
|
||||
unique=True,
|
||||
)
|
||||
domain = models.CharField(
|
||||
help_text="Domain name extracted from the feed URL.",
|
||||
verbose_name="Domain",
|
||||
max_length=255,
|
||||
db_index=True,
|
||||
)
|
||||
etag = models.CharField(
|
||||
help_text="HTTP ETag header for conditional requests.",
|
||||
verbose_name="ETag",
|
||||
max_length=255,
|
||||
blank=True,
|
||||
default="",
|
||||
)
|
||||
last_modified = models.CharField(
|
||||
help_text="HTTP Last-Modified header for conditional requests.",
|
||||
verbose_name="Last Modified",
|
||||
max_length=255,
|
||||
blank=True,
|
||||
default="",
|
||||
)
|
||||
is_active = models.BooleanField(
|
||||
help_text="Whether this feed is currently being fetched.",
|
||||
verbose_name="Is Active",
|
||||
default=True,
|
||||
)
|
||||
created_at = models.DateTimeField(
|
||||
help_text="Timestamp when this feed was first added.",
|
||||
verbose_name="Created At",
|
||||
auto_now_add=True,
|
||||
)
|
||||
last_fetched_at = models.DateTimeField(
|
||||
help_text="Timestamp when this feed was last fetched.",
|
||||
verbose_name="Last Fetched At",
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = "Feed"
|
||||
verbose_name_plural = "Feeds"
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Return the feed URL as string representation."""
|
||||
return self.url
|
||||
|
||||
def save(self, *args, **kwargs) -> None:
|
||||
"""Override save to auto-populate domain from URL if not set."""
|
||||
if not self.domain and self.url:
|
||||
self.domain = str(urlparse(str(self.url)).netloc)
|
||||
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(
|
||||
"Auto-populated domain '%s' for feed URL: %s",
|
||||
self.domain,
|
||||
self.url,
|
||||
)
|
||||
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
|
||||
class Entry(models.Model):
|
||||
"""An archived entry (item/post) from a feed."""
|
||||
|
||||
feed = models.ForeignKey(
|
||||
to="Feed",
|
||||
help_text="The feed this entry was fetched from.",
|
||||
on_delete=models.CASCADE,
|
||||
related_name="entries",
|
||||
verbose_name="Feed",
|
||||
)
|
||||
entry_id = models.CharField(
|
||||
help_text="Unique entry ID (guid, id, or link) from the feed.",
|
||||
verbose_name="Entry ID",
|
||||
max_length=512,
|
||||
db_index=True,
|
||||
)
|
||||
fetched_at = models.DateTimeField(
|
||||
help_text="Timestamp when this entry was archived.",
|
||||
verbose_name="Fetched At",
|
||||
auto_now_add=True,
|
||||
db_index=True,
|
||||
)
|
||||
published_at = models.DateTimeField(
|
||||
help_text="Timestamp when this entry was published (if available).",
|
||||
verbose_name="Published At",
|
||||
db_index=True,
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
content_hash = models.BigIntegerField(
|
||||
help_text="xxhash64 integer of the entry content for deduplication.",
|
||||
verbose_name="Content Hash",
|
||||
db_index=True,
|
||||
)
|
||||
data = models.JSONField(
|
||||
help_text="Parsed entry data as JSON.",
|
||||
verbose_name="Entry Data",
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
error_message = models.TextField(
|
||||
help_text="Error message if archiving failed.",
|
||||
verbose_name="Error Message",
|
||||
blank=True,
|
||||
default="",
|
||||
)
|
||||
|
||||
class Meta:
|
||||
unique_together = ("feed", "entry_id", "content_hash")
|
||||
indexes = [
|
||||
GinIndex(fields=["data"]),
|
||||
]
|
||||
verbose_name = "Entry"
|
||||
verbose_name_plural = "Entries"
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Return a string representation of the entry."""
|
||||
return f"{self.feed.domain} entry {self.entry_id} at {self.fetched_at}"
|
||||
Loading…
Add table
Add a link
Reference in a new issue