From b19927af0f0cbf1dd2faa41246c35c1a6e438fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Helle=C5=9Ben?= Date: Sat, 14 Mar 2026 05:26:48 +0100 Subject: [PATCH] Preserve Discord timestamp tags in message --- discord_rss_bot/custom_message.py | 117 ++++++++++++++---------- tests/test_custom_message.py | 144 ++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 48 deletions(-) create mode 100644 tests/test_custom_message.py diff --git a/discord_rss_bot/custom_message.py b/discord_rss_bot/custom_message.py index 058b275..b84b30f 100644 --- a/discord_rss_bot/custom_message.py +++ b/discord_rss_bot/custom_message.py @@ -3,6 +3,7 @@ from __future__ import annotations import html import json import logging +import re from dataclasses import dataclass from bs4 import BeautifulSoup @@ -18,6 +19,8 @@ from discord_rss_bot.settings import get_reader logger: logging.Logger = logging.getLogger(__name__) +DISCORD_TIMESTAMP_TAG_RE: re.Pattern[str] = re.compile(r"") + @dataclass(slots=True) class CustomEmbed: @@ -51,6 +54,68 @@ def try_to_replace(custom_message: str, template: str, replace_with: str) -> str return custom_message +def _preserve_discord_timestamp_tags(text: str) -> tuple[str, dict[str, str]]: + """Replace Discord timestamp tags with placeholders before markdown conversion. + + Args: + text: The text to replace tags in. + + Returns: + The text with Discord timestamp tags replaced by placeholders and a mapping of placeholders to original tags. + """ + replacements: dict[str, str] = {} + + def replace_match(match: re.Match[str]) -> str: + placeholder: str = f"DISCORDTIMESTAMPPLACEHOLDER{len(replacements)}" + replacements[placeholder] = match.group(0) + return placeholder + + return DISCORD_TIMESTAMP_TAG_RE.sub(replace_match, text), replacements + + +def _restore_discord_timestamp_tags(text: str, replacements: dict[str, str]) -> str: + """Restore preserved Discord timestamp tags after markdown conversion. + + Args: + text: The text to restore tags in. + replacements: A mapping of placeholders to original Discord timestamp tags. + + Returns: + The text with placeholders replaced by the original Discord timestamp tags. + """ + for placeholder, original_value in replacements.items(): + text = text.replace(placeholder, original_value) + return text + + +def format_entry_html_for_discord(text: str) -> str: + """Convert entry HTML to Discord-friendly markdown while preserving Discord timestamp tags. + + Args: + text: The HTML text to format. + + Returns: + The formatted text with Discord timestamp tags preserved. + """ + if not text: + return "" + + unescaped_text: str = html.unescape(text) + protected_text, replacements = _preserve_discord_timestamp_tags(unescaped_text) + formatted_text: str = markdownify( + html=protected_text, + strip=["img", "table", "td", "tr", "tbody", "thead"], + escape_misc=False, + heading_style="ATX", + ) + + if "[https://" in formatted_text or "[https://www." in formatted_text: + formatted_text = formatted_text.replace("[https://", "[") + formatted_text = formatted_text.replace("[https://www.", "[") + + return _restore_discord_timestamp_tags(formatted_text, replacements) + + def replace_tags_in_text_message(entry: Entry) -> str: """Replace tags in custom_message. @@ -73,30 +138,8 @@ def replace_tags_in_text_message(entry: Entry) -> str: first_image: str = get_first_image(summary, content) - # Unescape HTML entities (e.g., <h1> becomes

) before converting to markdown - summary = html.unescape(summary) - content = html.unescape(content) - - summary = markdownify( - html=summary, - strip=["img", "table", "td", "tr", "tbody", "thead"], - escape_misc=False, - heading_style="ATX", - ) - content = markdownify( - html=content, - strip=["img", "table", "td", "tr", "tbody", "thead"], - escape_misc=False, - heading_style="ATX", - ) - - if "[https://" in content or "[https://www." in content: - content = content.replace("[https://", "[") - content = content.replace("[https://www.", "[") - - if "[https://" in summary or "[https://www." in summary: - summary = summary.replace("[https://", "[") - summary = summary.replace("[https://www.", "[") + summary = format_entry_html_for_discord(summary) + content = format_entry_html_for_discord(content) feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never" feed_last_exception: str = feed.last_exception.value_str if feed.last_exception else "" @@ -208,30 +251,8 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed: first_image: str = get_first_image(summary, content) - # Unescape HTML entities (e.g., <h1> becomes

) before converting to markdown - summary = html.unescape(summary) - content = html.unescape(content) - - summary = markdownify( - html=summary, - strip=["img", "table", "td", "tr", "tbody", "thead"], - escape_misc=False, - heading_style="ATX", - ) - content = markdownify( - html=content, - strip=["img", "table", "td", "tr", "tbody", "thead"], - escape_misc=False, - heading_style="ATX", - ) - - if "[https://" in content or "[https://www." in content: - content = content.replace("[https://", "[") - content = content.replace("[https://www.", "[") - - if "[https://" in summary or "[https://www." in summary: - summary = summary.replace("[https://", "[") - summary = summary.replace("[https://www.", "[") + summary = format_entry_html_for_discord(summary) + content = format_entry_html_for_discord(content) feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never" feed_last_updated: str = feed.last_updated.strftime("%Y-%m-%d %H:%M:%S") if feed.last_updated else "Never" diff --git a/tests/test_custom_message.py b/tests/test_custom_message.py new file mode 100644 index 0000000..6fc4d41 --- /dev/null +++ b/tests/test_custom_message.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import typing +from types import SimpleNamespace +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from discord_rss_bot.custom_message import CustomEmbed +from discord_rss_bot.custom_message import format_entry_html_for_discord +from discord_rss_bot.custom_message import replace_tags_in_embed +from discord_rss_bot.custom_message import replace_tags_in_text_message + +if typing.TYPE_CHECKING: + from reader import Entry + +# https://docs.discord.com/developers/reference#message-formatting +TIMESTAMP_FORMATS: tuple[str, ...] = ( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", +) + + +def make_feed() -> SimpleNamespace: + return SimpleNamespace( + added=None, + author="Feed Author", + last_exception=None, + last_updated=None, + link="https://example.com/feed", + subtitle="", + title="Example Feed", + updated=None, + updates_enabled=True, + url="https://example.com/feed.xml", + user_title="", + version="atom10", + ) + + +def make_entry(summary: str) -> SimpleNamespace: + feed: SimpleNamespace = make_feed() + return SimpleNamespace( + added=None, + author="Entry Author", + content=[], + feed=feed, + feed_url=feed.url, + id="entry-1", + important=False, + link="https://example.com/entry-1", + published=None, + read=False, + read_modified=None, + summary=summary, + title="Entry Title", + updated=None, + ) + + +@pytest.mark.parametrize("timestamp_tag", TIMESTAMP_FORMATS) +def test_format_entry_html_for_discord_preserves_timestamp_tags(timestamp_tag: str) -> None: + escaped_timestamp_tag: str = timestamp_tag.replace("<", "<").replace(">", ">") + html_summary: str = f"

Starts: 2026-03-13 23:30 UTC ({escaped_timestamp_tag})

" + + rendered: str = format_entry_html_for_discord(html_summary) + + assert timestamp_tag in rendered + assert "DISCORDTIMESTAMPPLACEHOLDER" not in rendered + + +def test_format_entry_html_for_discord_empty_text_returns_empty_string() -> None: + rendered: str = format_entry_html_for_discord("") + assert not rendered + + +def test_format_entry_html_for_discord_cleans_markdownified_https_link_text() -> None: + html_summary: str = "[https://example.com](https://example.com)" + + rendered: str = format_entry_html_for_discord(html_summary) + + assert "[example.com](https://example.com)" in rendered + assert "[https://example.com]" not in rendered + + +def test_format_entry_html_for_discord_does_not_preserve_invalid_timestamp_style() -> None: + invalid_timestamp: str = "" + html_summary: str = f"

Invalid style ({invalid_timestamp.replace('<', '<').replace('>', '>')})

" + + rendered: str = format_entry_html_for_discord(html_summary) + + assert invalid_timestamp not in rendered + + +@patch("discord_rss_bot.custom_message.get_custom_message") +@patch("discord_rss_bot.custom_message.get_reader") +def test_replace_tags_in_text_message_preserves_timestamp_tags( + mock_get_reader: MagicMock, + mock_get_custom_message: MagicMock, +) -> None: + mock_get_reader.return_value = MagicMock() + mock_get_custom_message.return_value = "{{entry_summary}}" + summary_parts: list[str] = [ + f"

Format {index}: ({timestamp_tag.replace('<', '<').replace('>', '>')})

" + for index, timestamp_tag in enumerate(TIMESTAMP_FORMATS, start=1) + ] + entry_ns: SimpleNamespace = make_entry("".join(summary_parts)) + + entry: Entry = typing.cast("Entry", entry_ns) + rendered: str = replace_tags_in_text_message(entry) + + for timestamp_tag in TIMESTAMP_FORMATS: + assert timestamp_tag in rendered + + +@patch("discord_rss_bot.custom_message.get_embed") +@patch("discord_rss_bot.custom_message.get_reader") +def test_replace_tags_in_embed_preserves_timestamp_tags( + mock_get_reader: MagicMock, + mock_get_embed: MagicMock, +) -> None: + mock_get_reader.return_value = MagicMock() + mock_get_embed.return_value = CustomEmbed(description="{{entry_summary}}") + summary_parts: list[str] = [ + f"

Format {index}: ({timestamp_tag.replace('<', '<').replace('>', '>')})

" + for index, timestamp_tag in enumerate(TIMESTAMP_FORMATS, start=1) + ] + entry_ns: SimpleNamespace = make_entry("".join(summary_parts)) + + entry: Entry = typing.cast("Entry", entry_ns) + + embed: CustomEmbed = replace_tags_in_embed(entry_ns.feed, entry) + + for timestamp_tag in TIMESTAMP_FORMATS: + assert timestamp_tag in embed.description