Preserve Discord timestamp tags in message
All checks were successful
Test and build Docker image / docker (push) Successful in 1m30s

This commit is contained in:
Joakim Hellsén 2026-03-14 05:26:48 +01:00
commit b19927af0f
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
2 changed files with 213 additions and 48 deletions

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import html import html
import json import json
import logging import logging
import re
from dataclasses import dataclass from dataclasses import dataclass
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -18,6 +19,8 @@ from discord_rss_bot.settings import get_reader
logger: logging.Logger = logging.getLogger(__name__) logger: logging.Logger = logging.getLogger(__name__)
DISCORD_TIMESTAMP_TAG_RE: re.Pattern[str] = re.compile(r"<t:\d+(?::[tTdDfFrRsS])?>")
@dataclass(slots=True) @dataclass(slots=True)
class CustomEmbed: class CustomEmbed:
@ -51,6 +54,68 @@ def try_to_replace(custom_message: str, template: str, replace_with: str) -> str
return custom_message return custom_message
def _preserve_discord_timestamp_tags(text: str) -> tuple[str, dict[str, str]]:
"""Replace Discord timestamp tags with placeholders before markdown conversion.
Args:
text: The text to replace tags in.
Returns:
The text with Discord timestamp tags replaced by placeholders and a mapping of placeholders to original tags.
"""
replacements: dict[str, str] = {}
def replace_match(match: re.Match[str]) -> str:
placeholder: str = f"DISCORDTIMESTAMPPLACEHOLDER{len(replacements)}"
replacements[placeholder] = match.group(0)
return placeholder
return DISCORD_TIMESTAMP_TAG_RE.sub(replace_match, text), replacements
def _restore_discord_timestamp_tags(text: str, replacements: dict[str, str]) -> str:
"""Restore preserved Discord timestamp tags after markdown conversion.
Args:
text: The text to restore tags in.
replacements: A mapping of placeholders to original Discord timestamp tags.
Returns:
The text with placeholders replaced by the original Discord timestamp tags.
"""
for placeholder, original_value in replacements.items():
text = text.replace(placeholder, original_value)
return text
def format_entry_html_for_discord(text: str) -> str:
"""Convert entry HTML to Discord-friendly markdown while preserving Discord timestamp tags.
Args:
text: The HTML text to format.
Returns:
The formatted text with Discord timestamp tags preserved.
"""
if not text:
return ""
unescaped_text: str = html.unescape(text)
protected_text, replacements = _preserve_discord_timestamp_tags(unescaped_text)
formatted_text: str = markdownify(
html=protected_text,
strip=["img", "table", "td", "tr", "tbody", "thead"],
escape_misc=False,
heading_style="ATX",
)
if "[https://" in formatted_text or "[https://www." in formatted_text:
formatted_text = formatted_text.replace("[https://", "[")
formatted_text = formatted_text.replace("[https://www.", "[")
return _restore_discord_timestamp_tags(formatted_text, replacements)
def replace_tags_in_text_message(entry: Entry) -> str: def replace_tags_in_text_message(entry: Entry) -> str:
"""Replace tags in custom_message. """Replace tags in custom_message.
@ -73,30 +138,8 @@ def replace_tags_in_text_message(entry: Entry) -> str:
first_image: str = get_first_image(summary, content) first_image: str = get_first_image(summary, content)
# Unescape HTML entities (e.g., &lt;h1&gt; becomes <h1>) before converting to markdown summary = format_entry_html_for_discord(summary)
summary = html.unescape(summary) content = format_entry_html_for_discord(content)
content = html.unescape(content)
summary = markdownify(
html=summary,
strip=["img", "table", "td", "tr", "tbody", "thead"],
escape_misc=False,
heading_style="ATX",
)
content = markdownify(
html=content,
strip=["img", "table", "td", "tr", "tbody", "thead"],
escape_misc=False,
heading_style="ATX",
)
if "[https://" in content or "[https://www." in content:
content = content.replace("[https://", "[")
content = content.replace("[https://www.", "[")
if "[https://" in summary or "[https://www." in summary:
summary = summary.replace("[https://", "[")
summary = summary.replace("[https://www.", "[")
feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never" feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never"
feed_last_exception: str = feed.last_exception.value_str if feed.last_exception else "" feed_last_exception: str = feed.last_exception.value_str if feed.last_exception else ""
@ -208,30 +251,8 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
first_image: str = get_first_image(summary, content) first_image: str = get_first_image(summary, content)
# Unescape HTML entities (e.g., &lt;h1&gt; becomes <h1>) before converting to markdown summary = format_entry_html_for_discord(summary)
summary = html.unescape(summary) content = format_entry_html_for_discord(content)
content = html.unescape(content)
summary = markdownify(
html=summary,
strip=["img", "table", "td", "tr", "tbody", "thead"],
escape_misc=False,
heading_style="ATX",
)
content = markdownify(
html=content,
strip=["img", "table", "td", "tr", "tbody", "thead"],
escape_misc=False,
heading_style="ATX",
)
if "[https://" in content or "[https://www." in content:
content = content.replace("[https://", "[")
content = content.replace("[https://www.", "[")
if "[https://" in summary or "[https://www." in summary:
summary = summary.replace("[https://", "[")
summary = summary.replace("[https://www.", "[")
feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never" feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never"
feed_last_updated: str = feed.last_updated.strftime("%Y-%m-%d %H:%M:%S") if feed.last_updated else "Never" feed_last_updated: str = feed.last_updated.strftime("%Y-%m-%d %H:%M:%S") if feed.last_updated else "Never"

View file

@ -0,0 +1,144 @@
from __future__ import annotations
import typing
from types import SimpleNamespace
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from discord_rss_bot.custom_message import CustomEmbed
from discord_rss_bot.custom_message import format_entry_html_for_discord
from discord_rss_bot.custom_message import replace_tags_in_embed
from discord_rss_bot.custom_message import replace_tags_in_text_message
if typing.TYPE_CHECKING:
from reader import Entry
# https://docs.discord.com/developers/reference#message-formatting
TIMESTAMP_FORMATS: tuple[str, ...] = (
"<t:1773461490>",
"<t:1773461490:F>",
"<t:1773461490:f>",
"<t:1773461490:D>",
"<t:1773461490:d>",
"<t:1773461490:t>",
"<t:1773461490:T>",
"<t:1773461490:R>",
"<t:1773461490:s>",
"<t:1773461490:S>",
)
def make_feed() -> SimpleNamespace:
return SimpleNamespace(
added=None,
author="Feed Author",
last_exception=None,
last_updated=None,
link="https://example.com/feed",
subtitle="",
title="Example Feed",
updated=None,
updates_enabled=True,
url="https://example.com/feed.xml",
user_title="",
version="atom10",
)
def make_entry(summary: str) -> SimpleNamespace:
feed: SimpleNamespace = make_feed()
return SimpleNamespace(
added=None,
author="Entry Author",
content=[],
feed=feed,
feed_url=feed.url,
id="entry-1",
important=False,
link="https://example.com/entry-1",
published=None,
read=False,
read_modified=None,
summary=summary,
title="Entry Title",
updated=None,
)
@pytest.mark.parametrize("timestamp_tag", TIMESTAMP_FORMATS)
def test_format_entry_html_for_discord_preserves_timestamp_tags(timestamp_tag: str) -> None:
escaped_timestamp_tag: str = timestamp_tag.replace("<", "&lt;").replace(">", "&gt;")
html_summary: str = f"<p>Starts: 2026-03-13 23:30 UTC ({escaped_timestamp_tag})</p>"
rendered: str = format_entry_html_for_discord(html_summary)
assert timestamp_tag in rendered
assert "DISCORDTIMESTAMPPLACEHOLDER" not in rendered
def test_format_entry_html_for_discord_empty_text_returns_empty_string() -> None:
rendered: str = format_entry_html_for_discord("")
assert not rendered
def test_format_entry_html_for_discord_cleans_markdownified_https_link_text() -> None:
html_summary: str = "[https://example.com](https://example.com)"
rendered: str = format_entry_html_for_discord(html_summary)
assert "[example.com](https://example.com)" in rendered
assert "[https://example.com]" not in rendered
def test_format_entry_html_for_discord_does_not_preserve_invalid_timestamp_style() -> None:
invalid_timestamp: str = "<t:1773461490:Z>"
html_summary: str = f"<p>Invalid style ({invalid_timestamp.replace('<', '&lt;').replace('>', '&gt;')})</p>"
rendered: str = format_entry_html_for_discord(html_summary)
assert invalid_timestamp not in rendered
@patch("discord_rss_bot.custom_message.get_custom_message")
@patch("discord_rss_bot.custom_message.get_reader")
def test_replace_tags_in_text_message_preserves_timestamp_tags(
mock_get_reader: MagicMock,
mock_get_custom_message: MagicMock,
) -> None:
mock_get_reader.return_value = MagicMock()
mock_get_custom_message.return_value = "{{entry_summary}}"
summary_parts: list[str] = [
f"<p>Format {index}: ({timestamp_tag.replace('<', '&lt;').replace('>', '&gt;')})</p>"
for index, timestamp_tag in enumerate(TIMESTAMP_FORMATS, start=1)
]
entry_ns: SimpleNamespace = make_entry("".join(summary_parts))
entry: Entry = typing.cast("Entry", entry_ns)
rendered: str = replace_tags_in_text_message(entry)
for timestamp_tag in TIMESTAMP_FORMATS:
assert timestamp_tag in rendered
@patch("discord_rss_bot.custom_message.get_embed")
@patch("discord_rss_bot.custom_message.get_reader")
def test_replace_tags_in_embed_preserves_timestamp_tags(
mock_get_reader: MagicMock,
mock_get_embed: MagicMock,
) -> None:
mock_get_reader.return_value = MagicMock()
mock_get_embed.return_value = CustomEmbed(description="{{entry_summary}}")
summary_parts: list[str] = [
f"<p>Format {index}: ({timestamp_tag.replace('<', '&lt;').replace('>', '&gt;')})</p>"
for index, timestamp_tag in enumerate(TIMESTAMP_FORMATS, start=1)
]
entry_ns: SimpleNamespace = make_entry("".join(summary_parts))
entry: Entry = typing.cast("Entry", entry_ns)
embed: CustomEmbed = replace_tags_in_embed(entry_ns.feed, entry)
for timestamp_tag in TIMESTAMP_FORMATS:
assert timestamp_tag in embed.description