Fix markdown looking like shit
This commit is contained in:
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
@ -1,8 +1,3 @@
|
||||
{
|
||||
"cSpell.words": [
|
||||
"botuser",
|
||||
"Genshins",
|
||||
"levelname",
|
||||
"pipx"
|
||||
]
|
||||
"cSpell.words": ["botuser", "Genshins", "levelname", "markdownify", "pipx"]
|
||||
}
|
||||
|
@ -5,10 +5,10 @@ from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from markdownify import markdownify
|
||||
from reader import Entry, Feed, Reader, TagNotFoundError
|
||||
|
||||
from discord_rss_bot.is_url_valid import is_url_valid
|
||||
from discord_rss_bot.markdown import convert_html_to_md
|
||||
from discord_rss_bot.settings import get_reader, logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -68,8 +68,8 @@ def replace_tags_in_text_message(entry: Entry) -> str:
|
||||
|
||||
first_image: str = get_first_image(summary, content)
|
||||
|
||||
summary = convert_html_to_md(summary)
|
||||
content = convert_html_to_md(content)
|
||||
summary = markdownify(summary)
|
||||
content = markdownify(content)
|
||||
|
||||
list_of_replacements = [
|
||||
{"{{feed_author}}": feed.author},
|
||||
@ -96,7 +96,7 @@ def replace_tags_in_text_message(entry: Entry) -> str:
|
||||
{"{{entry_read_modified}}": entry.read_modified},
|
||||
{"{{entry_summary}}": summary},
|
||||
{"{{entry_summary_raw}}": entry.summary or ""},
|
||||
{"{{entry_text}}": content or summary},
|
||||
{"{{entry_text}}": summary or content},
|
||||
{"{{entry_title}}": entry.title},
|
||||
{"{{entry_updated}}": entry.updated},
|
||||
{"{{image_1}}": first_image},
|
||||
@ -106,7 +106,8 @@ def replace_tags_in_text_message(entry: Entry) -> str:
|
||||
for template, replace_with in replacement.items():
|
||||
custom_message = try_to_replace(custom_message, template, replace_with)
|
||||
|
||||
return custom_message.replace("\\n", "\n")
|
||||
our_custom_message = custom_message.replace("\\n", "\n")
|
||||
return our_custom_message # noqa: RET504
|
||||
|
||||
|
||||
def get_first_image(summary: str | None, content: str | None) -> str:
|
||||
@ -163,8 +164,8 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
|
||||
|
||||
first_image: str = get_first_image(summary, content)
|
||||
|
||||
summary = convert_html_to_md(summary)
|
||||
content = convert_html_to_md(content)
|
||||
summary = markdownify(summary)
|
||||
content = markdownify(content)
|
||||
|
||||
feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never"
|
||||
feed_last_updated: str = feed.last_updated.strftime("%Y-%m-%d %H:%M:%S") if feed.last_updated else "Never"
|
||||
@ -198,7 +199,7 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
|
||||
{"{{entry_read_modified}}": entry_read_modified or ""},
|
||||
{"{{entry_summary}}": summary or ""},
|
||||
{"{{entry_summary_raw}}": entry.summary or ""},
|
||||
{"{{entry_text}}": content or summary or ""},
|
||||
{"{{entry_text}}": summary or content or ""},
|
||||
{"{{entry_title}}": entry.title or ""},
|
||||
{"{{entry_updated}}": entry_updated or ""},
|
||||
{"{{image_1}}": first_image or ""},
|
||||
|
@ -2,12 +2,11 @@ from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import pprint
|
||||
import textwrap
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from discord_webhook import DiscordEmbed, DiscordWebhook
|
||||
from fastapi import HTTPException
|
||||
from reader import Entry, Feed, FeedExistsError, Reader, TagNotFoundError
|
||||
from reader import Entry, EntryNotFoundError, Feed, FeedExistsError, Reader, StorageError, TagNotFoundError
|
||||
|
||||
from discord_rss_bot import custom_message
|
||||
from discord_rss_bot.filter.blacklist import should_be_skipped
|
||||
@ -43,8 +42,6 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) ->
|
||||
# This has to be a string for some reason so don't change it to "not custom_message.get_custom_message()"
|
||||
if custom_message.get_custom_message(reader, entry.feed) != "": # noqa: PLC1901
|
||||
webhook_message = custom_message.replace_tags_in_text_message(entry=entry)
|
||||
else:
|
||||
webhook_message: str = str(default_custom_message)
|
||||
|
||||
if not webhook_message:
|
||||
webhook_message = "No message found."
|
||||
@ -62,6 +59,38 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) ->
|
||||
return None
|
||||
|
||||
|
||||
def set_description(custom_embed: custom_message.CustomEmbed, discord_embed: DiscordEmbed) -> None:
|
||||
"""Set the description of the embed.
|
||||
|
||||
Args:
|
||||
custom_embed (custom_message.CustomEmbed): The custom embed to get the description from.
|
||||
discord_embed (DiscordEmbed): The Discord embed to set the description on.
|
||||
"""
|
||||
# Its actually 2048, but we will use 2000 to be safe.
|
||||
max_description_length: int = 2000
|
||||
embed_description: str = custom_embed.description
|
||||
embed_description = (
|
||||
embed_description[:max_description_length] + "..."
|
||||
if len(embed_description) > max_description_length
|
||||
else embed_description
|
||||
)
|
||||
discord_embed.set_description(embed_description) if embed_description else None
|
||||
|
||||
|
||||
def set_title(custom_embed: custom_message.CustomEmbed, discord_embed: DiscordEmbed) -> None:
|
||||
"""Set the title of the embed.
|
||||
|
||||
Args:
|
||||
custom_embed: The custom embed to get the title from.
|
||||
discord_embed: The Discord embed to set the title on.
|
||||
"""
|
||||
# Its actually 256, but we will use 200 to be safe.
|
||||
max_title_length: int = 200
|
||||
embed_title: str = custom_embed.title
|
||||
embed_title = embed_title[:max_title_length] + "..." if len(embed_title) > max_title_length else embed_title
|
||||
discord_embed.set_title(embed_title) if embed_title else None
|
||||
|
||||
|
||||
def create_embed_webhook(webhook_url: str, entry: Entry) -> DiscordWebhook:
|
||||
"""Create a webhook with an embed.
|
||||
|
||||
@ -80,11 +109,8 @@ def create_embed_webhook(webhook_url: str, entry: Entry) -> DiscordWebhook:
|
||||
|
||||
discord_embed: DiscordEmbed = DiscordEmbed()
|
||||
|
||||
embed_title: str = textwrap.shorten(custom_embed.title, width=200, placeholder="...")
|
||||
discord_embed.set_title(embed_title) if embed_title else None
|
||||
|
||||
webhook_message: str = textwrap.shorten(custom_embed.description, width=2000, placeholder="...")
|
||||
discord_embed.set_description(webhook_message) if webhook_message else None
|
||||
set_description(custom_embed=custom_embed, discord_embed=discord_embed)
|
||||
set_title(custom_embed=custom_embed, discord_embed=discord_embed)
|
||||
|
||||
custom_embed_author_url: str | None = custom_embed.author_url
|
||||
if not is_url_valid(custom_embed_author_url):
|
||||
@ -158,7 +184,14 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
|
||||
continue
|
||||
|
||||
# Set the webhook to read, so we don't send it again.
|
||||
reader.set_entry_read(entry, True)
|
||||
try:
|
||||
reader.set_entry_read(entry, True)
|
||||
except EntryNotFoundError as e:
|
||||
logger.error("Error setting entry to read: %s", e)
|
||||
continue
|
||||
except StorageError as e:
|
||||
logger.error("Error setting entry to read: %s", e)
|
||||
continue
|
||||
|
||||
# Get the webhook URL for the entry. If it is None, we will continue to the next entry.
|
||||
webhook_url: str = str(reader.get_tag(entry.feed_url, "webhook", ""))
|
||||
@ -175,8 +208,13 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
|
||||
else:
|
||||
webhook_message: str = str(default_custom_message)
|
||||
|
||||
# Truncate the webhook_message to 2000 characters
|
||||
webhook_message = textwrap.shorten(webhook_message, width=2000, placeholder="...")
|
||||
# Its actually 4096, but we will use 4000 to be safe.
|
||||
max_content_length: int = 4000
|
||||
webhook_message = (
|
||||
webhook_message[:max_content_length] + "..."
|
||||
if len(webhook_message) > max_content_length
|
||||
else webhook_message
|
||||
)
|
||||
|
||||
# Create the webhook.
|
||||
webhook: DiscordWebhook = DiscordWebhook(url=webhook_url, content=webhook_message, rate_limit_retry=True)
|
||||
|
@ -17,6 +17,7 @@ from fastapi.responses import HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from httpx import Response
|
||||
from markdownify import markdownify
|
||||
from reader import Entry, Feed, FeedNotFoundError, Reader, TagNotFoundError
|
||||
from reader.types import JSONType
|
||||
from starlette.responses import RedirectResponse
|
||||
@ -36,7 +37,6 @@ from discord_rss_bot.custom_message import (
|
||||
save_embed,
|
||||
)
|
||||
from discord_rss_bot.feeds import create_feed, send_entry_to_discord, send_to_discord
|
||||
from discord_rss_bot.markdown import convert_html_to_md
|
||||
from discord_rss_bot.missing_tags import add_missing_tags
|
||||
from discord_rss_bot.search import create_html_for_search_results
|
||||
from discord_rss_bot.settings import get_reader
|
||||
@ -73,7 +73,7 @@ templates: Jinja2Templates = Jinja2Templates(directory="discord_rss_bot/template
|
||||
templates.env.filters["encode_url"] = encode_url
|
||||
templates.env.filters["entry_is_whitelisted"] = entry_is_whitelisted
|
||||
templates.env.filters["entry_is_blacklisted"] = entry_is_blacklisted
|
||||
templates.env.filters["discord_markdown"] = convert_html_to_md
|
||||
templates.env.filters["discord_markdown"] = markdownify
|
||||
|
||||
|
||||
@app.post("/add_webhook")
|
||||
|
@ -1,53 +0,0 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def convert_html_to_md(html: str) -> str:
|
||||
"""Convert HTML to markdown.
|
||||
|
||||
Args:
|
||||
html: The HTML to convert.
|
||||
|
||||
Returns:
|
||||
Our markdown.
|
||||
"""
|
||||
if not html:
|
||||
return html
|
||||
|
||||
soup: BeautifulSoup = BeautifulSoup(html, features="lxml")
|
||||
|
||||
for bold in soup.find_all("b") + soup.find_all("strong"):
|
||||
bold.replace_with(f"**{bold.text}**")
|
||||
|
||||
for italic in soup.find_all("i") + soup.find_all("em"):
|
||||
italic.replace_with(f"*{italic.text}*")
|
||||
|
||||
for blockquote in soup.find_all("blockquote") + soup.find_all("q"):
|
||||
blockquote.replace_with(f">>> {blockquote.text}")
|
||||
|
||||
for code in soup.find_all("code") + soup.find_all("pre"):
|
||||
code.replace_with(f"`{code.text}`")
|
||||
|
||||
for image in soup.find_all("img"):
|
||||
image.decompose()
|
||||
|
||||
for link in soup.find_all("a") + soup.find_all("link"):
|
||||
if not link.get_text().strip():
|
||||
link.decompose()
|
||||
else:
|
||||
link_text: str = link.text or link.get("href")
|
||||
link_text = link_text.replace("http://", "").replace("https://", "")
|
||||
link.replace_with(f"[{link_text}]({link.get('href')})")
|
||||
|
||||
for strikethrough in soup.find_all("s") + soup.find_all("del") + soup.find_all("strike"):
|
||||
strikethrough.replace_with(f"~~{strikethrough.text}~~")
|
||||
|
||||
for br in soup.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
|
||||
clean_soup: BeautifulSoup = BeautifulSoup(str(soup).replace("</p>", "</p>\n"), features="lxml")
|
||||
|
||||
# Remove all other tags
|
||||
for tag in clean_soup.find_all(True):
|
||||
tag.replace_with(tag.text)
|
||||
|
||||
return clean_soup.text.strip()
|
17
poetry.lock
generated
17
poetry.lock
generated
@ -675,6 +675,21 @@ html5 = ["html5lib"]
|
||||
htmlsoup = ["BeautifulSoup4"]
|
||||
source = ["Cython (==0.29.37)"]
|
||||
|
||||
[[package]]
|
||||
name = "markdownify"
|
||||
version = "0.12.1"
|
||||
description = "Convert HTML to markdown."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "markdownify-0.12.1-py3-none-any.whl", hash = "sha256:a3805abd8166dbb7b27783c5599d91f54f10d79894b2621404d85b333c7ce561"},
|
||||
{file = "markdownify-0.12.1.tar.gz", hash = "sha256:1fb08c618b30e0ee7a31a39b998f44a18fb28ab254f55f4af06b6d35a2179e27"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
beautifulsoup4 = ">=4.9,<5"
|
||||
six = ">=1.15,<2"
|
||||
|
||||
[[package]]
|
||||
name = "markupsafe"
|
||||
version = "2.1.5"
|
||||
@ -1673,4 +1688,4 @@ watchdog = ["watchdog (>=2.3)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "1a20eeb21e0dad90c4116b164c8d7a796e53b2bfad916ed494970ee84ee2de52"
|
||||
content-hash = "80dda8c54105faacac42a5eb722aa0e985bf42443bf2b8d32d2bda90e4fb5756"
|
||||
|
@ -19,6 +19,7 @@ python-multipart = "^0.0.9"
|
||||
reader = "^3.12"
|
||||
tomlkit = "^0.12.0"
|
||||
uvicorn = { extras = ["standard"], version = "^0.29.0" }
|
||||
markdownify = "^0.12.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
djlint = "^1.34.1"
|
||||
|
@ -1,69 +0,0 @@
|
||||
from discord_rss_bot.markdown import convert_html_to_md
|
||||
|
||||
|
||||
def test_convert_to_md() -> None:
|
||||
# Test bold
|
||||
assert convert_html_to_md("<b>bold</b>") == "**bold**"
|
||||
|
||||
# Test italic
|
||||
assert convert_html_to_md("<i>italic</i>") == "*italic*"
|
||||
|
||||
# Test blockquote
|
||||
assert convert_html_to_md("<blockquote>blockquote</blockquote>") == ">>> blockquote"
|
||||
|
||||
# Test code
|
||||
assert convert_html_to_md("<code>code</code>") == "`code`"
|
||||
|
||||
# Test strikethrough
|
||||
assert convert_html_to_md("<s>strikethrough</s>") == "~~strikethrough~~"
|
||||
|
||||
# Test link
|
||||
assert convert_html_to_md('<a href="https://example.com">link</a>') == "[link](https://example.com)"
|
||||
|
||||
# Test pre code
|
||||
assert convert_html_to_md("<pre><code>pre code</code></pre>") == "``pre code``"
|
||||
|
||||
# Test strong
|
||||
assert convert_html_to_md("<strong>strong</strong>") == "**strong**"
|
||||
|
||||
# Test multiple tags
|
||||
assert (
|
||||
convert_html_to_md(
|
||||
'<b>bold</b> <i>italic</i> <a href="https://example.com">link</a> <code>code</code> <s>strikethrough</s>',
|
||||
)
|
||||
== "**bold** *italic* [link](https://example.com) `code` ~~strikethrough~~"
|
||||
)
|
||||
|
||||
# Test removing all other tags
|
||||
assert convert_html_to_md("<p>paragraph</p>") == "paragraph"
|
||||
assert convert_html_to_md("<p>paragraph</p><p>paragraph</p>") == "paragraph\nparagraph"
|
||||
|
||||
# Test <br> tags
|
||||
assert convert_html_to_md("<p>paragraph<br>paragraph</p>") == "paragraph\nparagraph"
|
||||
|
||||
# Test removing trailing newline
|
||||
assert convert_html_to_md("paragraph ") == "paragraph"
|
||||
|
||||
# Test removing leading and trailing whitespace
|
||||
assert convert_html_to_md(" paragraph ") == "paragraph"
|
||||
|
||||
# Test removing leading and trailing whitespace and trailing newline
|
||||
assert convert_html_to_md(" paragraph\n \n") == "paragraph"
|
||||
|
||||
# Test real entry
|
||||
nvidia_entry: str = (
|
||||
'<p><a href="https://www.nvidia.com/en-us/geforce/news/jan-2023-nvidia-broadcast-update/">'
|
||||
"NVIDIA Broadcast 1.4 Adds Eye Contact and Vignette Effects With Virtual Background Enhancements</a></p>"
|
||||
'<div class="field field-name-field-short-description field-type-text-long field-label-hidden">'
|
||||
'<div class="field-items"><div class="field-item even">Plus new options to mirror your camera and take a selfie.</div>' # noqa: E501
|
||||
'</div></div><div class="field field-name-field-thumbnail-image field-type-image field-label-hidden">'
|
||||
'<div class="field-items"><div class="field-item even"><a href="https://www.nvidia.com/en-us/geforce/news/jan-2023-nvidia-broadcast-update/">'
|
||||
'<img width="210" src="https://www.nvidia.com/content/dam/en-zz/Solutions/geforce/news/jan-2023-nvidia-broadcast-update/broadcast-owned-asset-625x330-newsfeed.png"'
|
||||
' title="NVIDIA Broadcast 1.4 Adds Eye Contact and Vignette Effects With Virtual Background Enhancements" '
|
||||
'alt="NVIDIA Broadcast 1.4 Adds Eye Contact and Vignette Effects With Virtual Background Enhancements"></a></div></div></div>' # noqa: E501
|
||||
)
|
||||
assert (
|
||||
convert_html_to_md(nvidia_entry)
|
||||
== "[NVIDIA Broadcast 1.4 Adds Eye Contact and Vignette Effects With Virtual Background Enhancements](https://www.nvidia.com/en-us/geforce/news/jan-2023-nvidia-broadcast-update/)\n"
|
||||
"Plus new options to mirror your camera and take a selfie."
|
||||
)
|
Reference in New Issue
Block a user