Fix markdown looking like shit

This commit is contained in:
2024-05-24 01:11:38 +02:00
parent 1368607e29
commit 73b171dbfd
8 changed files with 79 additions and 151 deletions

View File

@ -5,10 +5,10 @@ from dataclasses import dataclass
from typing import TYPE_CHECKING
from bs4 import BeautifulSoup
from markdownify import markdownify
from reader import Entry, Feed, Reader, TagNotFoundError
from discord_rss_bot.is_url_valid import is_url_valid
from discord_rss_bot.markdown import convert_html_to_md
from discord_rss_bot.settings import get_reader, logger
if TYPE_CHECKING:
@ -68,8 +68,8 @@ def replace_tags_in_text_message(entry: Entry) -> str:
first_image: str = get_first_image(summary, content)
summary = convert_html_to_md(summary)
content = convert_html_to_md(content)
summary = markdownify(summary)
content = markdownify(content)
list_of_replacements = [
{"{{feed_author}}": feed.author},
@ -96,7 +96,7 @@ def replace_tags_in_text_message(entry: Entry) -> str:
{"{{entry_read_modified}}": entry.read_modified},
{"{{entry_summary}}": summary},
{"{{entry_summary_raw}}": entry.summary or ""},
{"{{entry_text}}": content or summary},
{"{{entry_text}}": summary or content},
{"{{entry_title}}": entry.title},
{"{{entry_updated}}": entry.updated},
{"{{image_1}}": first_image},
@ -106,7 +106,8 @@ def replace_tags_in_text_message(entry: Entry) -> str:
for template, replace_with in replacement.items():
custom_message = try_to_replace(custom_message, template, replace_with)
return custom_message.replace("\\n", "\n")
our_custom_message = custom_message.replace("\\n", "\n")
return our_custom_message # noqa: RET504
def get_first_image(summary: str | None, content: str | None) -> str:
@ -163,8 +164,8 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
first_image: str = get_first_image(summary, content)
summary = convert_html_to_md(summary)
content = convert_html_to_md(content)
summary = markdownify(summary)
content = markdownify(content)
feed_added: str = feed.added.strftime("%Y-%m-%d %H:%M:%S") if feed.added else "Never"
feed_last_updated: str = feed.last_updated.strftime("%Y-%m-%d %H:%M:%S") if feed.last_updated else "Never"
@ -198,7 +199,7 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
{"{{entry_read_modified}}": entry_read_modified or ""},
{"{{entry_summary}}": summary or ""},
{"{{entry_summary_raw}}": entry.summary or ""},
{"{{entry_text}}": content or summary or ""},
{"{{entry_text}}": summary or content or ""},
{"{{entry_title}}": entry.title or ""},
{"{{entry_updated}}": entry_updated or ""},
{"{{image_1}}": first_image or ""},

View File

@ -2,12 +2,11 @@ from __future__ import annotations
import datetime
import pprint
import textwrap
from typing import TYPE_CHECKING
from discord_webhook import DiscordEmbed, DiscordWebhook
from fastapi import HTTPException
from reader import Entry, Feed, FeedExistsError, Reader, TagNotFoundError
from reader import Entry, EntryNotFoundError, Feed, FeedExistsError, Reader, StorageError, TagNotFoundError
from discord_rss_bot import custom_message
from discord_rss_bot.filter.blacklist import should_be_skipped
@ -43,8 +42,6 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) ->
# This has to be a string for some reason so don't change it to "not custom_message.get_custom_message()"
if custom_message.get_custom_message(reader, entry.feed) != "": # noqa: PLC1901
webhook_message = custom_message.replace_tags_in_text_message(entry=entry)
else:
webhook_message: str = str(default_custom_message)
if not webhook_message:
webhook_message = "No message found."
@ -62,6 +59,38 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) ->
return None
def set_description(custom_embed: custom_message.CustomEmbed, discord_embed: DiscordEmbed) -> None:
"""Set the description of the embed.
Args:
custom_embed (custom_message.CustomEmbed): The custom embed to get the description from.
discord_embed (DiscordEmbed): The Discord embed to set the description on.
"""
# Its actually 2048, but we will use 2000 to be safe.
max_description_length: int = 2000
embed_description: str = custom_embed.description
embed_description = (
embed_description[:max_description_length] + "..."
if len(embed_description) > max_description_length
else embed_description
)
discord_embed.set_description(embed_description) if embed_description else None
def set_title(custom_embed: custom_message.CustomEmbed, discord_embed: DiscordEmbed) -> None:
"""Set the title of the embed.
Args:
custom_embed: The custom embed to get the title from.
discord_embed: The Discord embed to set the title on.
"""
# Its actually 256, but we will use 200 to be safe.
max_title_length: int = 200
embed_title: str = custom_embed.title
embed_title = embed_title[:max_title_length] + "..." if len(embed_title) > max_title_length else embed_title
discord_embed.set_title(embed_title) if embed_title else None
def create_embed_webhook(webhook_url: str, entry: Entry) -> DiscordWebhook:
"""Create a webhook with an embed.
@ -80,11 +109,8 @@ def create_embed_webhook(webhook_url: str, entry: Entry) -> DiscordWebhook:
discord_embed: DiscordEmbed = DiscordEmbed()
embed_title: str = textwrap.shorten(custom_embed.title, width=200, placeholder="...")
discord_embed.set_title(embed_title) if embed_title else None
webhook_message: str = textwrap.shorten(custom_embed.description, width=2000, placeholder="...")
discord_embed.set_description(webhook_message) if webhook_message else None
set_description(custom_embed=custom_embed, discord_embed=discord_embed)
set_title(custom_embed=custom_embed, discord_embed=discord_embed)
custom_embed_author_url: str | None = custom_embed.author_url
if not is_url_valid(custom_embed_author_url):
@ -158,7 +184,14 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
continue
# Set the webhook to read, so we don't send it again.
reader.set_entry_read(entry, True)
try:
reader.set_entry_read(entry, True)
except EntryNotFoundError as e:
logger.error("Error setting entry to read: %s", e)
continue
except StorageError as e:
logger.error("Error setting entry to read: %s", e)
continue
# Get the webhook URL for the entry. If it is None, we will continue to the next entry.
webhook_url: str = str(reader.get_tag(entry.feed_url, "webhook", ""))
@ -175,8 +208,13 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
else:
webhook_message: str = str(default_custom_message)
# Truncate the webhook_message to 2000 characters
webhook_message = textwrap.shorten(webhook_message, width=2000, placeholder="...")
# Its actually 4096, but we will use 4000 to be safe.
max_content_length: int = 4000
webhook_message = (
webhook_message[:max_content_length] + "..."
if len(webhook_message) > max_content_length
else webhook_message
)
# Create the webhook.
webhook: DiscordWebhook = DiscordWebhook(url=webhook_url, content=webhook_message, rate_limit_retry=True)

View File

@ -17,6 +17,7 @@ from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from httpx import Response
from markdownify import markdownify
from reader import Entry, Feed, FeedNotFoundError, Reader, TagNotFoundError
from reader.types import JSONType
from starlette.responses import RedirectResponse
@ -36,7 +37,6 @@ from discord_rss_bot.custom_message import (
save_embed,
)
from discord_rss_bot.feeds import create_feed, send_entry_to_discord, send_to_discord
from discord_rss_bot.markdown import convert_html_to_md
from discord_rss_bot.missing_tags import add_missing_tags
from discord_rss_bot.search import create_html_for_search_results
from discord_rss_bot.settings import get_reader
@ -73,7 +73,7 @@ templates: Jinja2Templates = Jinja2Templates(directory="discord_rss_bot/template
templates.env.filters["encode_url"] = encode_url
templates.env.filters["entry_is_whitelisted"] = entry_is_whitelisted
templates.env.filters["entry_is_blacklisted"] = entry_is_blacklisted
templates.env.filters["discord_markdown"] = convert_html_to_md
templates.env.filters["discord_markdown"] = markdownify
@app.post("/add_webhook")

View File

@ -1,53 +0,0 @@
from bs4 import BeautifulSoup
def convert_html_to_md(html: str) -> str:
"""Convert HTML to markdown.
Args:
html: The HTML to convert.
Returns:
Our markdown.
"""
if not html:
return html
soup: BeautifulSoup = BeautifulSoup(html, features="lxml")
for bold in soup.find_all("b") + soup.find_all("strong"):
bold.replace_with(f"**{bold.text}**")
for italic in soup.find_all("i") + soup.find_all("em"):
italic.replace_with(f"*{italic.text}*")
for blockquote in soup.find_all("blockquote") + soup.find_all("q"):
blockquote.replace_with(f">>> {blockquote.text}")
for code in soup.find_all("code") + soup.find_all("pre"):
code.replace_with(f"`{code.text}`")
for image in soup.find_all("img"):
image.decompose()
for link in soup.find_all("a") + soup.find_all("link"):
if not link.get_text().strip():
link.decompose()
else:
link_text: str = link.text or link.get("href")
link_text = link_text.replace("http://", "").replace("https://", "")
link.replace_with(f"[{link_text}]({link.get('href')})")
for strikethrough in soup.find_all("s") + soup.find_all("del") + soup.find_all("strike"):
strikethrough.replace_with(f"~~{strikethrough.text}~~")
for br in soup.find_all("br"):
br.replace_with("\n")
clean_soup: BeautifulSoup = BeautifulSoup(str(soup).replace("</p>", "</p>\n"), features="lxml")
# Remove all other tags
for tag in clean_soup.find_all(True):
tag.replace_with(tag.text)
return clean_soup.text.strip()