From 873f5b3f7a7c061fad9097413aec53f88c120383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Sat, 14 Jan 2023 12:54:03 +0100 Subject: [PATCH] Convert HTML in content/summary to Markdown --- discord_rss_bot/custom_message.py | 20 +++-- discord_rss_bot/feeds.py | 6 +- discord_rss_bot/main.py | 119 ++++++++++++++-------------- discord_rss_bot/settings.py | 6 +- discord_rss_bot/templates/feed.html | 6 +- poetry.lock | 18 ++++- pyproject.toml | 1 + 7 files changed, 101 insertions(+), 75 deletions(-) diff --git a/discord_rss_bot/custom_message.py b/discord_rss_bot/custom_message.py index c8f1441..1d657c0 100644 --- a/discord_rss_bot/custom_message.py +++ b/discord_rss_bot/custom_message.py @@ -1,3 +1,4 @@ +from markdownify import markdownify from reader import Entry, Feed, Reader, TagNotFoundError from discord_rss_bot.settings import get_reader @@ -41,6 +42,13 @@ def replace_tags(feed: Feed, entry: Entry) -> str: custom_reader: Reader = get_reader() custom_message: str = get_custom_message(feed=feed, custom_reader=custom_reader) + summary = "" + content = "" + if entry.summary: + summary: str = markdownify(entry.summary) + if entry.content: + content: str = markdownify(entry.content[0]["value"]) + list_of_replacements = [ {"{{feed_author}}": feed.author}, {"{{feed_added}}": feed.added}, @@ -56,21 +64,21 @@ def replace_tags(feed: Feed, entry: Entry) -> str: {"{{feed_version}}": feed.version}, {"{{entry_added}}": entry.added}, {"{{entry_author}}": entry.author}, - {"{{entry_content}}": entry.content}, + {"{{entry_content}}": content}, {"{{entry_id}}": entry.id}, {"{{entry_important}}": str(entry.important)}, {"{{entry_link}}": entry.link}, {"{{entry_published}}": entry.published}, {"{{entry_read}}": str(entry.read)}, {"{{entry_read_modified}}": entry.read_modified}, - {"{{entry_summary}}": entry.summary}, + {"{{entry_summary}}": summary}, {"{{entry_title}}": entry.title}, {"{{entry_updated}}": entry.updated}, ] for replacement in list_of_replacements: for template, replace_with in replacement.items(): - custom_message: str = try_to_replace(custom_message, template, replace_with) + custom_message = try_to_replace(custom_message, template, replace_with) print(f"custom_message: {custom_message}") return custom_message @@ -87,9 +95,9 @@ def get_custom_message(custom_reader: Reader, feed: Feed) -> str: Returns the contents from the custom_message tag. """ try: - custom_message: str = custom_reader.get_tag(feed, "custom_message") # type: ignore + custom_message: str = str(custom_reader.get_tag(feed, "custom_message")) except TagNotFoundError: - custom_message: str = "" + custom_message = "" except ValueError: - custom_message: str = "" + custom_message = "" return custom_message diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py index 985c736..81bace9 100644 --- a/discord_rss_bot/feeds.py +++ b/discord_rss_bot/feeds.py @@ -34,17 +34,17 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non if feed is None: entries: Iterable[Entry] = reader.get_entries(read=False) else: - entries: Iterable[Entry] = reader.get_entries(feed=feed, read=False) + entries = reader.get_entries(feed=feed, read=False) for entry in entries: # Set the webhook to read, so we don't send it again. reader.set_entry_read(entry, True) - webhook_url: str | None = settings.get_webhook_for_entry(reader, entry) + webhook_url: str = settings.get_webhook_for_entry(reader, entry) webhook_message: str = f"{entry.title}\n{entry.link}" - if webhook_url is None: + if not webhook_url: print(f"Error: No webhook found for feed: {entry.feed.title}") continue diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py index c351501..ec71443 100644 --- a/discord_rss_bot/main.py +++ b/discord_rss_bot/main.py @@ -1,6 +1,6 @@ import urllib.parse from datetime import datetime -from typing import Any, Iterable +from typing import Iterable import uvicorn from apscheduler.schedulers.background import BackgroundScheduler @@ -8,6 +8,7 @@ from fastapi import FastAPI, Form, Request from fastapi.responses import HTMLResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates +from markdownify import markdownify from reader import Entry, EntryCounts, EntrySearchCounts, EntrySearchResult, Feed, FeedCounts, Reader, TagNotFoundError from starlette.responses import RedirectResponse @@ -27,13 +28,14 @@ reader: Reader = get_reader() # Add the filters to the Jinja2 environment so they can be used in html templates. -templates.env.filters["encode_url"] = encode_url # type: ignore -templates.env.filters["entry_is_whitelisted"] = entry_is_whitelisted # type: ignore -templates.env.filters["entry_is_blacklisted"] = entry_is_blacklisted # type: ignore +templates.env.filters["encode_url"] = encode_url +templates.env.filters["entry_is_whitelisted"] = entry_is_whitelisted +templates.env.filters["entry_is_blacklisted"] = entry_is_blacklisted +templates.env.filters["discord_markdown"] = markdownify @app.post("/add_webhook") -async def add_webhook(webhook_name: str = Form(), webhook_url: str = Form()): +async def add_webhook(webhook_name=Form(), webhook_url=Form()): """ Add a feed to the database. @@ -69,7 +71,7 @@ async def add_webhook(webhook_name: str = Form(), webhook_url: str = Form()): @app.post("/delete_webhook") -async def delete_webhook(webhook_url: str = Form()): +async def delete_webhook(webhook_url=Form()): """ Delete a webhook from the database. @@ -103,7 +105,7 @@ async def delete_webhook(webhook_url: str = Form()): @app.post("/add") -async def create_feed(feed_url: str = Form(), webhook_dropdown: str = Form()): +async def create_feed(feed_url=Form(), webhook_dropdown=Form()): """ Add a feed to the database. @@ -124,19 +126,19 @@ async def create_feed(feed_url: str = Form(), webhook_dropdown: str = Form()): # Mark every entry as read, so we don't send all the old entries to Discord. entries: Iterable[Entry] = reader.get_entries(feed=clean_feed_url, read=False) for entry in entries: - reader.set_entry_read(entry, True) # type: ignore + reader.set_entry_read(entry, True) try: hooks = reader.get_tag((), "webhooks") except TagNotFoundError: hooks = [] - webhook_url = "" + webhook_url: str = "" if hooks: # Get the webhook URL from the dropdown. for hook in hooks: if hook["name"] == webhook_dropdown: # type: ignore - webhook_url: str = hook["url"] # type: ignore + webhook_url = hook["url"] # type: ignore break if not webhook_url: @@ -152,7 +154,7 @@ async def create_feed(feed_url: str = Form(), webhook_dropdown: str = Form()): @app.post("/pause") -async def pause_feed(feed_url: str = Form()): +async def pause_feed(feed_url=Form()): """Pause a feed. Args: @@ -171,7 +173,7 @@ async def pause_feed(feed_url: str = Form()): @app.post("/unpause") -async def unpause_feed(feed_url: str = Form()): +async def unpause_feed(feed_url=Form()): """Unpause a feed. Args: @@ -191,10 +193,10 @@ async def unpause_feed(feed_url: str = Form()): @app.post("/whitelist") async def set_whitelist( - whitelist_title: str = Form(None), - whitelist_summary: str = Form(None), - whitelist_content: str = Form(None), - feed_url: str = Form(), + whitelist_title=Form(None), + whitelist_summary=Form(None), + whitelist_content=Form(None), + feed_url=Form(), ): """Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent. @@ -208,20 +210,20 @@ async def set_whitelist( Redirect back to the feed page. """ if whitelist_title: - reader.set_tag(feed_url, "whitelist_title", whitelist_title) # type: ignore + reader.set_tag(feed_url, "whitelist_title", whitelist_title) if whitelist_summary: - reader.set_tag(feed_url, "whitelist_summary", whitelist_summary) # type: ignore + reader.set_tag(feed_url, "whitelist_summary", whitelist_summary) if whitelist_content: - reader.set_tag(feed_url, "whitelist_content", whitelist_content) # type: ignore + reader.set_tag(feed_url, "whitelist_content", whitelist_content) # Clean URL is used to redirect to the feed page. - clean_url: str = urllib.parse.quote(feed_url) + clean_url = urllib.parse.quote(feed_url) return RedirectResponse(url=f"/feed/?feed_url={clean_url}", status_code=303) @app.get("/whitelist", response_class=HTMLResponse) -async def get_whitelist(feed_url: str, request: Request): +async def get_whitelist(feed_url, request: Request): """Get the whitelist. Args: @@ -248,15 +250,15 @@ async def get_whitelist(feed_url: str, request: Request): "whitelist_summary": whitelist_summary, "whitelist_content": whitelist_content, } - return templates.TemplateResponse("whitelist.html", context) # type: ignore + return templates.TemplateResponse("whitelist.html", context) @app.post("/blacklist") async def set_blacklist( - blacklist_title: str = Form(None), - blacklist_summary: str = Form(None), - blacklist_content: str = Form(None), - feed_url: str = Form(), + blacklist_title=Form(None), + blacklist_summary=Form(None), + blacklist_content=Form(None), + feed_url=Form(), ): """Set the blacklist, if this is set we will check if words are in the title, summary or content and then don't send that entry. @@ -273,20 +275,20 @@ async def set_blacklist( # Add the blacklist to the feed. if blacklist_title: - reader.set_tag(feed_url, "blacklist_title", blacklist_title) # type: ignore + reader.set_tag(feed_url, "blacklist_title", blacklist_title) if blacklist_summary: - reader.set_tag(feed_url, "blacklist_summary", blacklist_summary) # type: ignore + reader.set_tag(feed_url, "blacklist_summary", blacklist_summary) if blacklist_content: - reader.set_tag(feed_url, "blacklist_content", blacklist_content) # type: ignore + reader.set_tag(feed_url, "blacklist_content", blacklist_content) # Clean URL is used to redirect to the feed page. - clean_url: str = urllib.parse.quote(feed_url) + clean_url = urllib.parse.quote(feed_url) return RedirectResponse(url=f"/feed/?feed_url={clean_url}", status_code=303) @app.get("/blacklist", response_class=HTMLResponse) -async def get_blacklist(feed_url: str, request: Request): +async def get_blacklist(feed_url, request: Request): # Make feed_url a valid URL. url: str = urllib.parse.unquote(feed_url) @@ -304,11 +306,11 @@ async def get_blacklist(feed_url: str, request: Request): "blacklist_summary": blacklist_summary, "blacklist_content": blacklist_content, } - return templates.TemplateResponse("blacklist.html", context) # type: ignore + return templates.TemplateResponse("blacklist.html", context) @app.post("/custom") -async def set_custom(custom_message: str = Form(""), feed_url: str = Form()): +async def set_custom(custom_message=Form(""), feed_url=Form()): """ Set the custom message, this is used when sending the message. @@ -335,7 +337,7 @@ async def set_custom(custom_message: str = Form(""), feed_url: str = Form()): @app.get("/custom", response_class=HTMLResponse) -async def get_custom(feed_url: str, request: Request): +async def get_custom(feed_url, request: Request): """Get the custom message. This is used when sending the message to Discord. Args: @@ -355,12 +357,12 @@ async def get_custom(feed_url: str, request: Request): custom_message: str = get_custom_message(reader, feed) # Get the first entry, this is used to show the user what the custom message will look like. - first_entry: Entry = reader.get_entries(feed=feed, limit=1) - for entry in first_entry: - first_entry = entry + entries: Iterable[Entry] = reader.get_entries(feed=feed, limit=1) + for entry in entries: + first_entry: Entry = entry context = {"request": request, "feed": feed, "custom_message": custom_message, "entry": first_entry} - return templates.TemplateResponse("custom.html", context) # type: ignore + return templates.TemplateResponse("custom.html", context) @app.get("/add", response_class=HTMLResponse) @@ -374,12 +376,12 @@ def get_add(request: Request): Returns: HTMLResponse: The HTML response. """ - context = make_context_index(request) # type: ignore - return templates.TemplateResponse("add.html", context) # type: ignore + context = make_context_index(request) + return templates.TemplateResponse("add.html", context) @app.get("/feed", response_class=HTMLResponse) -async def get_feed(feed_url: str, request: Request): +async def get_feed(feed_url, request: Request): """ Get a feed by URL. @@ -402,7 +404,7 @@ async def get_feed(feed_url: str, request: Request): feed_counts: FeedCounts = reader.get_feed_counts(feed=url) context = {"request": request, "feed": feed, "entries": entries, "feed_counts": feed_counts} - return templates.TemplateResponse("feed.html", context) # type: ignore + return templates.TemplateResponse("feed.html", context) @app.get("/webhooks", response_class=HTMLResponse) @@ -416,7 +418,7 @@ async def get_webhooks(request: Request): Returns: HTMLResponse: The HTML response. """ - return templates.TemplateResponse("webhooks.html", {"request": request}) # type: ignore + return templates.TemplateResponse("webhooks.html", {"request": request}) @app.get("/", response_class=HTMLResponse) @@ -430,11 +432,11 @@ def index(request: Request): Returns: HTMLResponse: The HTML response. """ - context = make_context_index(request) # type: ignore - return templates.TemplateResponse("index.html", context) # type: ignore + context = make_context_index(request) + return templates.TemplateResponse("index.html", context) -def make_context_index(request: Request) -> dict[str, Any]: +def make_context_index(request: Request): """ Create the needed context for the index page. @@ -452,12 +454,12 @@ def make_context_index(request: Request) -> dict[str, Any]: except TagNotFoundError: hooks = [] - feed_list: list[dict[str, Any]] = [] - broken_feeds: list[Feed] = [] + feed_list = [] + broken_feeds = [] feeds: Iterable[Feed] = reader.get_feeds() for feed in feeds: try: - hook: str = reader.get_tag(feed.url, "webhook") # type: ignore + hook = reader.get_tag(feed.url, "webhook") feed_list.append({"feed": feed, "webhook": hook}) except TagNotFoundError: broken_feeds.append(feed) @@ -468,7 +470,7 @@ def make_context_index(request: Request) -> dict[str, Any]: feed_count: FeedCounts = reader.get_feed_counts() entry_count: EntryCounts = reader.get_entry_counts() - context: dict[str, Any] = { + return { "request": request, "feeds": feed_list, "feed_count": feed_count, @@ -476,11 +478,10 @@ def make_context_index(request: Request) -> dict[str, Any]: "webhooks": hooks, "broken_feeds": broken_feeds, } - return context @app.post("/remove", response_class=HTMLResponse) -async def remove_feed(feed_url: str = Form()): +async def remove_feed(feed_url=Form()): """ Get a feed by URL. @@ -497,7 +498,7 @@ async def remove_feed(feed_url: str = Form()): @app.get("/search", response_class=HTMLResponse) -async def search(request: Request, query: str): +async def search(request: Request, query): """ Get entries matching a full-text search query. @@ -514,16 +515,16 @@ async def search(request: Request, query: str): search_html: str = create_html_for_search_results(search_results) - context: dict[str, Request | str | EntrySearchCounts] = { + context = { "request": request, "search_html": search_html, "query": query, "search_amount": search_amount, } - return templates.TemplateResponse("search.html", context) # type: ignore + return templates.TemplateResponse("search.html", context) -@app.on_event("startup") # type: ignore +@app.on_event("startup") def startup() -> None: """This is called when the server starts. @@ -531,10 +532,10 @@ def startup() -> None: scheduler: BackgroundScheduler = BackgroundScheduler() # Update all feeds every 15 minutes. - scheduler.add_job(send_to_discord, "interval", minutes=15, next_run_time=datetime.now()) # type: ignore + scheduler.add_job(send_to_discord, "interval", minutes=15, next_run_time=datetime.now()) - scheduler.start() # type: ignore + scheduler.start() if __name__ == "__main__": - uvicorn.run("main:app", log_level="debug", reload=True) # type: ignore + uvicorn.run("main:app", log_level="debug", reload=True) diff --git a/discord_rss_bot/settings.py b/discord_rss_bot/settings.py index 6fa1cff..cdc415d 100644 --- a/discord_rss_bot/settings.py +++ b/discord_rss_bot/settings.py @@ -10,7 +10,7 @@ data_dir: str = user_data_dir(appname="discord_rss_bot", appauthor="TheLovinator os.makedirs(data_dir, exist_ok=True) -def get_webhook_for_entry(custom_reader: Reader, entry: Entry) -> str | None: +def get_webhook_for_entry(custom_reader: Reader, entry: Entry) -> str: """ Get the webhook from the database. @@ -26,12 +26,12 @@ def get_webhook_for_entry(custom_reader: Reader, entry: Entry) -> str | None: # Get the webhook from the feed. # Is None if not found or error. - webhook_url: str | None + webhook_url: str try: webhook_url = str(reader.get_tag(entry.feed_url, "webhook")) except TagNotFoundError: print(f"Webhook not found for feed {entry.feed_url}") - webhook_url = None + webhook_url = "" return webhook_url diff --git a/discord_rss_bot/templates/feed.html b/discord_rss_bot/templates/feed.html index 48efc1f..4312b59 100644 --- a/discord_rss_bot/templates/feed.html +++ b/discord_rss_bot/templates/feed.html @@ -47,13 +47,13 @@ {% if entry.published %} @ {{ entry.published.strftime('%Y-%m-%d, %T') }} {% endif %} - + {# TODO: Only show one if both are the same #} {% if entry.summary %}
Summary
- {{ entry.summary | safe }} + {{ entry.summary | discord_markdown }}
{% endif %} @@ -61,7 +61,7 @@
Content
- {{ entry.content[0].value | safe }} + {{ entry.content[0].value | discord_markdown }}
{% endif %} diff --git a/poetry.lock b/poetry.lock index f865d7a..2338bdc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -380,6 +380,22 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "markdownify" +version = "0.11.6" +description = "Convert HTML to markdown." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "markdownify-0.11.6-py3-none-any.whl", hash = "sha256:ba35fe289d5e9073bcd7d2cad629278fe25f1a93741fcdc0bfb4f009076d8324"}, + {file = "markdownify-0.11.6.tar.gz", hash = "sha256:009b240e0c9f4c8eaf1d085625dcd4011e12f0f8cec55dedf9ea6f7655e49bfe"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.9,<5" +six = ">=1.15,<2" + [[package]] name = "markupsafe" version = "2.1.1" @@ -1075,4 +1091,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "37fcda2250c0a72f4d127ce22a0d243609277379431e6f72d828beaf90b8f0f1" +content-hash = "d49e7d798db6866a97b7a9ced8b13efbe0c9465cac2cf4dd6dacd3d29da70f73" diff --git a/pyproject.toml b/pyproject.toml index 6be5607..5fc3c10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ apscheduler = "^3.9.1.post1" python-multipart = "^0.0.5" python-dotenv = "^0.21.0" tomlkit = "^0.11.6" +markdownify = "^0.11.6" [tool.poetry.group.dev.dependencies] pytest = "^7.2.0"