Unescape HTML entities in summary and content before markdown conversion

This commit is contained in:
Joakim Hellsén 2025-12-08 17:47:45 +01:00
commit 9378dac0fa
No known key found for this signature in database

View file

@ -1,5 +1,6 @@
from __future__ import annotations
import html
import json
import logging
from dataclasses import dataclass
@ -68,6 +69,10 @@ def replace_tags_in_text_message(entry: Entry) -> str:
first_image: str = get_first_image(summary, content)
# Unescape HTML entities (e.g., &lt;h1&gt; becomes <h1>) before converting to markdown
summary = html.unescape(summary)
content = html.unescape(content)
summary = markdownify(
html=summary,
strip=["img", "table", "td", "tr", "tbody", "thead"],
@ -199,6 +204,10 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
first_image: str = get_first_image(summary, content)
# Unescape HTML entities (e.g., &lt;h1&gt; becomes <h1>) before converting to markdown
summary = html.unescape(summary)
content = html.unescape(content)
summary = markdownify(
html=summary,
strip=["img", "table", "td", "tr", "tbody", "thead"],