diff --git a/feedvault/add_feeds.py b/feedvault/add_feeds.py index 7bb8e96..55e40f6 100644 --- a/feedvault/add_feeds.py +++ b/feedvault/add_feeds.py @@ -1,11 +1,10 @@ from __future__ import annotations -import datetime import logging -from time import mktime, struct_time from typing import TYPE_CHECKING from urllib.parse import ParseResult, urlparse +import dateparser import feedparser from django.utils import timezone from feedparser import FeedParserDict @@ -13,6 +12,8 @@ from feedparser import FeedParserDict from feedvault.models import Author, Domain, Entry, Feed, Generator, Publisher if TYPE_CHECKING: + import datetime + from django.contrib.auth.models import AbstractBaseUser, AnonymousUser logger: logging.Logger = logging.getLogger(__name__) @@ -141,21 +142,6 @@ def parse_feed(url: str | None) -> dict | None: return parsed_feed -def struct_time_to_datetime(struct_time: struct_time | None) -> datetime.datetime | None: - """Convert a struct_time to a datetime.""" - if not struct_time: - return None - - if struct_time == "Mon, 01 Jan 0001 00:00:00 +0000": - return None - - dt: datetime.datetime = datetime.datetime.fromtimestamp(mktime(struct_time), tz=datetime.UTC) - if not dt: - logger.error("Error converting struct_time to datetime: %s", struct_time) - return None - return dt - - def add_entry(feed: Feed, entry: FeedParserDict) -> Entry | None: """Add an entry to the database. @@ -165,10 +151,25 @@ def add_entry(feed: Feed, entry: FeedParserDict) -> Entry | None: """ author: Author = get_author(parsed_feed=entry) publisher: Publisher = get_publisher(parsed_feed=entry) - updated_parsed: datetime | None = struct_time_to_datetime(struct_time=entry.get("updated_parsed")) # type: ignore # noqa: PGH003 - published_parsed: datetime | None = struct_time_to_datetime(struct_time=entry.get("published_parsed")) # type: ignore # noqa: PGH003 - expired_parsed: datetime | None = struct_time_to_datetime(struct_time=entry.get("expired_parsed")) # type: ignore # noqa: PGH003 - created_parsed: datetime | None = struct_time_to_datetime(struct_time=entry.get("created_parsed")) # type: ignore # noqa: PGH003 + pre_updated_parsed: str = str(entry.get("updated_parsed", "")) + updated_parsed: datetime.datetime | None = ( + dateparser.parse(date_string=str(pre_updated_parsed)) if pre_updated_parsed else None + ) + + pre_published_parsed: str = str(entry.get("published_parsed", "")) + published_parsed: datetime.datetime | None = ( + dateparser.parse(date_string=str(pre_published_parsed)) if pre_published_parsed else None + ) + + pre_expired_parsed: str = str(entry.get("expired_parsed", "")) + expired_parsed: datetime.datetime | None = ( + dateparser.parse(date_string=str(pre_expired_parsed)) if pre_expired_parsed else None + ) + + pre_created_parsed = str(entry.get("created_parsed", "")) + created_parsed: datetime.datetime | None = ( + dateparser.parse(date_string=str(pre_created_parsed)) if pre_created_parsed else None + ) _entry = Entry( feed=feed, @@ -201,18 +202,14 @@ def add_entry(feed: Feed, entry: FeedParserDict) -> Entry | None: ) # Save the entry. - try: - _entry.save() - except Exception: - logger.exception("Error saving entry for feed: %s", feed) - return None + _entry.save() logger.info("Created entry: %s", _entry) return _entry -def add_feed(url: str | None, user: AbstractBaseUser | AnonymousUser) -> Feed | None: +def add_feed(url: str | None, user: AbstractBaseUser | AnonymousUser) -> Feed | None: # noqa: PLR0914 """Add a feed to the database. Args: @@ -242,8 +239,18 @@ def add_feed(url: str | None, user: AbstractBaseUser | AnonymousUser) -> Feed | generator: Generator = def_generator(parsed_feed=parsed_feed) publisher: Publisher = get_publisher(parsed_feed=parsed_feed) - published_parsed: datetime | None = struct_time_to_datetime(struct_time=parsed_feed.get("published_parsed")) # type: ignore # noqa: PGH003 - updated_parsed: datetime | None = struct_time_to_datetime(struct_time=parsed_feed.get("updated_parsed")) # type: ignore # noqa: PGH003 + pre_published_parsed: str = str(parsed_feed.get("published_parsed", "")) + published_parsed: datetime.datetime | None = ( + dateparser.parse(date_string=str(pre_published_parsed)) if pre_published_parsed else None + ) + + pre_updated_parsed: str = str(parsed_feed.get("updated_parsed", "")) + updated_parsed: datetime.datetime | None = ( + dateparser.parse(date_string=str(pre_updated_parsed)) if pre_updated_parsed else None + ) + + pre_modified: str = str(parsed_feed.get("modified", "")) + modified: timezone.datetime | None = dateparser.parse(date_string=pre_modified) if pre_modified else None # Create the feed feed = Feed( @@ -257,7 +264,7 @@ def add_feed(url: str | None, user: AbstractBaseUser | AnonymousUser) -> Feed | etag=parsed_feed.get("etag", ""), headers=parsed_feed.get("headers", {}), href=parsed_feed.get("href", ""), - modified=parsed_feed.get("modified"), + modified=modified, namespaces=parsed_feed.get("namespaces", {}), status=parsed_feed.get("status", 0), version=parsed_feed.get("version", ""), @@ -296,11 +303,7 @@ def add_feed(url: str | None, user: AbstractBaseUser | AnonymousUser) -> Feed | ) # Save the feed. - try: - feed.save() - except Exception: - logger.exception("Got exception while saving feed: %s", url) - return None + feed.save() entries = parsed_feed.get("entries", []) for entry in entries: diff --git a/poetry.lock b/poetry.lock index 8d6a9ce..cb110ef 100644 --- a/poetry.lock +++ b/poetry.lock @@ -175,6 +175,28 @@ editorconfig = ">=0.12.2" jsbeautifier = "*" six = ">=1.13.0" +[[package]] +name = "dateparser" +version = "1.2.0" +description = "Date parsing library designed to parse dates from HTML pages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dateparser-1.2.0-py2.py3-none-any.whl", hash = "sha256:0b21ad96534e562920a0083e97fd45fa959882d4162acc358705144520a35830"}, + {file = "dateparser-1.2.0.tar.gz", hash = "sha256:7975b43a4222283e0ae15be7b4999d08c9a70e2d378ac87385b1ccf2cffbbb30"}, +] + +[package.dependencies] +python-dateutil = "*" +pytz = "*" +regex = "<2019.02.19 || >2019.02.19,<2021.8.27 || >2021.8.27" +tzlocal = "*" + +[package.extras] +calendars = ["convertdate", "hijri-converter"] +fasttext = ["fasttext"] +langdetect = ["langdetect"] + [[package]] name = "django" version = "5.0.3" @@ -347,6 +369,20 @@ files = [ {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "python-dotenv" version = "1.0.1" @@ -361,6 +397,17 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -617,7 +664,24 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] +[[package]] +name = "tzlocal" +version = "5.2" +description = "tzinfo object for the local timezone" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"}, + {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"}, +] + +[package.dependencies] +tzdata = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] + [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "dd4d8ba16bb5e34d2e0f94009d4ea86de094a6b1d6d1af3e6b69c14e881ccf3e" +content-hash = "2617c6ec410cc30c300b46a5d653fa2a2aaa1737509851ab19b2e628b2838a65" diff --git a/pyproject.toml b/pyproject.toml index a2a4ec3..de91d65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ django = { extras = ["argon2"], version = "^5.0.3" } python-dotenv = "^1.0.1" feedparser = "^6.0.11" gunicorn = "^21.2.0" +dateparser = "^1.2.0" [tool.poetry.group.dev.dependencies] ruff = "^0.3.0"