From 9378dac0fa84db3cb23f1222c691cd89543a60ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Mon, 8 Dec 2025 17:47:45 +0100 Subject: [PATCH] Unescape HTML entities in summary and content before markdown conversion --- discord_rss_bot/custom_message.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/discord_rss_bot/custom_message.py b/discord_rss_bot/custom_message.py index 99a7e11..f6ff0f5 100644 --- a/discord_rss_bot/custom_message.py +++ b/discord_rss_bot/custom_message.py @@ -1,5 +1,6 @@ from __future__ import annotations +import html import json import logging from dataclasses import dataclass @@ -68,6 +69,10 @@ def replace_tags_in_text_message(entry: Entry) -> str: first_image: str = get_first_image(summary, content) + # Unescape HTML entities (e.g., <h1> becomes

) before converting to markdown + summary = html.unescape(summary) + content = html.unescape(content) + summary = markdownify( html=summary, strip=["img", "table", "td", "tr", "tbody", "thead"], @@ -199,6 +204,10 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed: first_image: str = get_first_image(summary, content) + # Unescape HTML entities (e.g., <h1> becomes

) before converting to markdown + summary = html.unescape(summary) + content = html.unescape(content) + summary = markdownify( html=summary, strip=["img", "table", "td", "tr", "tbody", "thead"],