Convert HTML in content/summary to Markdown

This commit is contained in:
2023-01-14 12:54:03 +01:00
parent 0700a169fa
commit 873f5b3f7a
7 changed files with 101 additions and 75 deletions

View File

@ -1,3 +1,4 @@
from markdownify import markdownify
from reader import Entry, Feed, Reader, TagNotFoundError from reader import Entry, Feed, Reader, TagNotFoundError
from discord_rss_bot.settings import get_reader from discord_rss_bot.settings import get_reader
@ -41,6 +42,13 @@ def replace_tags(feed: Feed, entry: Entry) -> str:
custom_reader: Reader = get_reader() custom_reader: Reader = get_reader()
custom_message: str = get_custom_message(feed=feed, custom_reader=custom_reader) custom_message: str = get_custom_message(feed=feed, custom_reader=custom_reader)
summary = ""
content = ""
if entry.summary:
summary: str = markdownify(entry.summary)
if entry.content:
content: str = markdownify(entry.content[0]["value"])
list_of_replacements = [ list_of_replacements = [
{"{{feed_author}}": feed.author}, {"{{feed_author}}": feed.author},
{"{{feed_added}}": feed.added}, {"{{feed_added}}": feed.added},
@ -56,21 +64,21 @@ def replace_tags(feed: Feed, entry: Entry) -> str:
{"{{feed_version}}": feed.version}, {"{{feed_version}}": feed.version},
{"{{entry_added}}": entry.added}, {"{{entry_added}}": entry.added},
{"{{entry_author}}": entry.author}, {"{{entry_author}}": entry.author},
{"{{entry_content}}": entry.content}, {"{{entry_content}}": content},
{"{{entry_id}}": entry.id}, {"{{entry_id}}": entry.id},
{"{{entry_important}}": str(entry.important)}, {"{{entry_important}}": str(entry.important)},
{"{{entry_link}}": entry.link}, {"{{entry_link}}": entry.link},
{"{{entry_published}}": entry.published}, {"{{entry_published}}": entry.published},
{"{{entry_read}}": str(entry.read)}, {"{{entry_read}}": str(entry.read)},
{"{{entry_read_modified}}": entry.read_modified}, {"{{entry_read_modified}}": entry.read_modified},
{"{{entry_summary}}": entry.summary}, {"{{entry_summary}}": summary},
{"{{entry_title}}": entry.title}, {"{{entry_title}}": entry.title},
{"{{entry_updated}}": entry.updated}, {"{{entry_updated}}": entry.updated},
] ]
for replacement in list_of_replacements: for replacement in list_of_replacements:
for template, replace_with in replacement.items(): for template, replace_with in replacement.items():
custom_message: str = try_to_replace(custom_message, template, replace_with) custom_message = try_to_replace(custom_message, template, replace_with)
print(f"custom_message: {custom_message}") print(f"custom_message: {custom_message}")
return custom_message return custom_message
@ -87,9 +95,9 @@ def get_custom_message(custom_reader: Reader, feed: Feed) -> str:
Returns the contents from the custom_message tag. Returns the contents from the custom_message tag.
""" """
try: try:
custom_message: str = custom_reader.get_tag(feed, "custom_message") # type: ignore custom_message: str = str(custom_reader.get_tag(feed, "custom_message"))
except TagNotFoundError: except TagNotFoundError:
custom_message: str = "" custom_message = ""
except ValueError: except ValueError:
custom_message: str = "" custom_message = ""
return custom_message return custom_message

View File

@ -34,17 +34,17 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
if feed is None: if feed is None:
entries: Iterable[Entry] = reader.get_entries(read=False) entries: Iterable[Entry] = reader.get_entries(read=False)
else: else:
entries: Iterable[Entry] = reader.get_entries(feed=feed, read=False) entries = reader.get_entries(feed=feed, read=False)
for entry in entries: for entry in entries:
# Set the webhook to read, so we don't send it again. # Set the webhook to read, so we don't send it again.
reader.set_entry_read(entry, True) reader.set_entry_read(entry, True)
webhook_url: str | None = settings.get_webhook_for_entry(reader, entry) webhook_url: str = settings.get_webhook_for_entry(reader, entry)
webhook_message: str = f"{entry.title}\n{entry.link}" webhook_message: str = f"{entry.title}\n{entry.link}"
if webhook_url is None: if not webhook_url:
print(f"Error: No webhook found for feed: {entry.feed.title}") print(f"Error: No webhook found for feed: {entry.feed.title}")
continue continue

View File

@ -1,6 +1,6 @@
import urllib.parse import urllib.parse
from datetime import datetime from datetime import datetime
from typing import Any, Iterable from typing import Iterable
import uvicorn import uvicorn
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
@ -8,6 +8,7 @@ from fastapi import FastAPI, Form, Request
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from markdownify import markdownify
from reader import Entry, EntryCounts, EntrySearchCounts, EntrySearchResult, Feed, FeedCounts, Reader, TagNotFoundError from reader import Entry, EntryCounts, EntrySearchCounts, EntrySearchResult, Feed, FeedCounts, Reader, TagNotFoundError
from starlette.responses import RedirectResponse from starlette.responses import RedirectResponse
@ -27,13 +28,14 @@ reader: Reader = get_reader()
# Add the filters to the Jinja2 environment so they can be used in html templates. # Add the filters to the Jinja2 environment so they can be used in html templates.
templates.env.filters["encode_url"] = encode_url # type: ignore templates.env.filters["encode_url"] = encode_url
templates.env.filters["entry_is_whitelisted"] = entry_is_whitelisted # type: ignore templates.env.filters["entry_is_whitelisted"] = entry_is_whitelisted
templates.env.filters["entry_is_blacklisted"] = entry_is_blacklisted # type: ignore templates.env.filters["entry_is_blacklisted"] = entry_is_blacklisted
templates.env.filters["discord_markdown"] = markdownify
@app.post("/add_webhook") @app.post("/add_webhook")
async def add_webhook(webhook_name: str = Form(), webhook_url: str = Form()): async def add_webhook(webhook_name=Form(), webhook_url=Form()):
""" """
Add a feed to the database. Add a feed to the database.
@ -69,7 +71,7 @@ async def add_webhook(webhook_name: str = Form(), webhook_url: str = Form()):
@app.post("/delete_webhook") @app.post("/delete_webhook")
async def delete_webhook(webhook_url: str = Form()): async def delete_webhook(webhook_url=Form()):
""" """
Delete a webhook from the database. Delete a webhook from the database.
@ -103,7 +105,7 @@ async def delete_webhook(webhook_url: str = Form()):
@app.post("/add") @app.post("/add")
async def create_feed(feed_url: str = Form(), webhook_dropdown: str = Form()): async def create_feed(feed_url=Form(), webhook_dropdown=Form()):
""" """
Add a feed to the database. Add a feed to the database.
@ -124,19 +126,19 @@ async def create_feed(feed_url: str = Form(), webhook_dropdown: str = Form()):
# Mark every entry as read, so we don't send all the old entries to Discord. # Mark every entry as read, so we don't send all the old entries to Discord.
entries: Iterable[Entry] = reader.get_entries(feed=clean_feed_url, read=False) entries: Iterable[Entry] = reader.get_entries(feed=clean_feed_url, read=False)
for entry in entries: for entry in entries:
reader.set_entry_read(entry, True) # type: ignore reader.set_entry_read(entry, True)
try: try:
hooks = reader.get_tag((), "webhooks") hooks = reader.get_tag((), "webhooks")
except TagNotFoundError: except TagNotFoundError:
hooks = [] hooks = []
webhook_url = "" webhook_url: str = ""
if hooks: if hooks:
# Get the webhook URL from the dropdown. # Get the webhook URL from the dropdown.
for hook in hooks: for hook in hooks:
if hook["name"] == webhook_dropdown: # type: ignore if hook["name"] == webhook_dropdown: # type: ignore
webhook_url: str = hook["url"] # type: ignore webhook_url = hook["url"] # type: ignore
break break
if not webhook_url: if not webhook_url:
@ -152,7 +154,7 @@ async def create_feed(feed_url: str = Form(), webhook_dropdown: str = Form()):
@app.post("/pause") @app.post("/pause")
async def pause_feed(feed_url: str = Form()): async def pause_feed(feed_url=Form()):
"""Pause a feed. """Pause a feed.
Args: Args:
@ -171,7 +173,7 @@ async def pause_feed(feed_url: str = Form()):
@app.post("/unpause") @app.post("/unpause")
async def unpause_feed(feed_url: str = Form()): async def unpause_feed(feed_url=Form()):
"""Unpause a feed. """Unpause a feed.
Args: Args:
@ -191,10 +193,10 @@ async def unpause_feed(feed_url: str = Form()):
@app.post("/whitelist") @app.post("/whitelist")
async def set_whitelist( async def set_whitelist(
whitelist_title: str = Form(None), whitelist_title=Form(None),
whitelist_summary: str = Form(None), whitelist_summary=Form(None),
whitelist_content: str = Form(None), whitelist_content=Form(None),
feed_url: str = Form(), feed_url=Form(),
): ):
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent. """Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
@ -208,20 +210,20 @@ async def set_whitelist(
Redirect back to the feed page. Redirect back to the feed page.
""" """
if whitelist_title: if whitelist_title:
reader.set_tag(feed_url, "whitelist_title", whitelist_title) # type: ignore reader.set_tag(feed_url, "whitelist_title", whitelist_title)
if whitelist_summary: if whitelist_summary:
reader.set_tag(feed_url, "whitelist_summary", whitelist_summary) # type: ignore reader.set_tag(feed_url, "whitelist_summary", whitelist_summary)
if whitelist_content: if whitelist_content:
reader.set_tag(feed_url, "whitelist_content", whitelist_content) # type: ignore reader.set_tag(feed_url, "whitelist_content", whitelist_content)
# Clean URL is used to redirect to the feed page. # Clean URL is used to redirect to the feed page.
clean_url: str = urllib.parse.quote(feed_url) clean_url = urllib.parse.quote(feed_url)
return RedirectResponse(url=f"/feed/?feed_url={clean_url}", status_code=303) return RedirectResponse(url=f"/feed/?feed_url={clean_url}", status_code=303)
@app.get("/whitelist", response_class=HTMLResponse) @app.get("/whitelist", response_class=HTMLResponse)
async def get_whitelist(feed_url: str, request: Request): async def get_whitelist(feed_url, request: Request):
"""Get the whitelist. """Get the whitelist.
Args: Args:
@ -248,15 +250,15 @@ async def get_whitelist(feed_url: str, request: Request):
"whitelist_summary": whitelist_summary, "whitelist_summary": whitelist_summary,
"whitelist_content": whitelist_content, "whitelist_content": whitelist_content,
} }
return templates.TemplateResponse("whitelist.html", context) # type: ignore return templates.TemplateResponse("whitelist.html", context)
@app.post("/blacklist") @app.post("/blacklist")
async def set_blacklist( async def set_blacklist(
blacklist_title: str = Form(None), blacklist_title=Form(None),
blacklist_summary: str = Form(None), blacklist_summary=Form(None),
blacklist_content: str = Form(None), blacklist_content=Form(None),
feed_url: str = Form(), feed_url=Form(),
): ):
"""Set the blacklist, if this is set we will check if words are in the title, summary or content """Set the blacklist, if this is set we will check if words are in the title, summary or content
and then don't send that entry. and then don't send that entry.
@ -273,20 +275,20 @@ async def set_blacklist(
# Add the blacklist to the feed. # Add the blacklist to the feed.
if blacklist_title: if blacklist_title:
reader.set_tag(feed_url, "blacklist_title", blacklist_title) # type: ignore reader.set_tag(feed_url, "blacklist_title", blacklist_title)
if blacklist_summary: if blacklist_summary:
reader.set_tag(feed_url, "blacklist_summary", blacklist_summary) # type: ignore reader.set_tag(feed_url, "blacklist_summary", blacklist_summary)
if blacklist_content: if blacklist_content:
reader.set_tag(feed_url, "blacklist_content", blacklist_content) # type: ignore reader.set_tag(feed_url, "blacklist_content", blacklist_content)
# Clean URL is used to redirect to the feed page. # Clean URL is used to redirect to the feed page.
clean_url: str = urllib.parse.quote(feed_url) clean_url = urllib.parse.quote(feed_url)
return RedirectResponse(url=f"/feed/?feed_url={clean_url}", status_code=303) return RedirectResponse(url=f"/feed/?feed_url={clean_url}", status_code=303)
@app.get("/blacklist", response_class=HTMLResponse) @app.get("/blacklist", response_class=HTMLResponse)
async def get_blacklist(feed_url: str, request: Request): async def get_blacklist(feed_url, request: Request):
# Make feed_url a valid URL. # Make feed_url a valid URL.
url: str = urllib.parse.unquote(feed_url) url: str = urllib.parse.unquote(feed_url)
@ -304,11 +306,11 @@ async def get_blacklist(feed_url: str, request: Request):
"blacklist_summary": blacklist_summary, "blacklist_summary": blacklist_summary,
"blacklist_content": blacklist_content, "blacklist_content": blacklist_content,
} }
return templates.TemplateResponse("blacklist.html", context) # type: ignore return templates.TemplateResponse("blacklist.html", context)
@app.post("/custom") @app.post("/custom")
async def set_custom(custom_message: str = Form(""), feed_url: str = Form()): async def set_custom(custom_message=Form(""), feed_url=Form()):
""" """
Set the custom message, this is used when sending the message. Set the custom message, this is used when sending the message.
@ -335,7 +337,7 @@ async def set_custom(custom_message: str = Form(""), feed_url: str = Form()):
@app.get("/custom", response_class=HTMLResponse) @app.get("/custom", response_class=HTMLResponse)
async def get_custom(feed_url: str, request: Request): async def get_custom(feed_url, request: Request):
"""Get the custom message. This is used when sending the message to Discord. """Get the custom message. This is used when sending the message to Discord.
Args: Args:
@ -355,12 +357,12 @@ async def get_custom(feed_url: str, request: Request):
custom_message: str = get_custom_message(reader, feed) custom_message: str = get_custom_message(reader, feed)
# Get the first entry, this is used to show the user what the custom message will look like. # Get the first entry, this is used to show the user what the custom message will look like.
first_entry: Entry = reader.get_entries(feed=feed, limit=1) entries: Iterable[Entry] = reader.get_entries(feed=feed, limit=1)
for entry in first_entry: for entry in entries:
first_entry = entry first_entry: Entry = entry
context = {"request": request, "feed": feed, "custom_message": custom_message, "entry": first_entry} context = {"request": request, "feed": feed, "custom_message": custom_message, "entry": first_entry}
return templates.TemplateResponse("custom.html", context) # type: ignore return templates.TemplateResponse("custom.html", context)
@app.get("/add", response_class=HTMLResponse) @app.get("/add", response_class=HTMLResponse)
@ -374,12 +376,12 @@ def get_add(request: Request):
Returns: Returns:
HTMLResponse: The HTML response. HTMLResponse: The HTML response.
""" """
context = make_context_index(request) # type: ignore context = make_context_index(request)
return templates.TemplateResponse("add.html", context) # type: ignore return templates.TemplateResponse("add.html", context)
@app.get("/feed", response_class=HTMLResponse) @app.get("/feed", response_class=HTMLResponse)
async def get_feed(feed_url: str, request: Request): async def get_feed(feed_url, request: Request):
""" """
Get a feed by URL. Get a feed by URL.
@ -402,7 +404,7 @@ async def get_feed(feed_url: str, request: Request):
feed_counts: FeedCounts = reader.get_feed_counts(feed=url) feed_counts: FeedCounts = reader.get_feed_counts(feed=url)
context = {"request": request, "feed": feed, "entries": entries, "feed_counts": feed_counts} context = {"request": request, "feed": feed, "entries": entries, "feed_counts": feed_counts}
return templates.TemplateResponse("feed.html", context) # type: ignore return templates.TemplateResponse("feed.html", context)
@app.get("/webhooks", response_class=HTMLResponse) @app.get("/webhooks", response_class=HTMLResponse)
@ -416,7 +418,7 @@ async def get_webhooks(request: Request):
Returns: Returns:
HTMLResponse: The HTML response. HTMLResponse: The HTML response.
""" """
return templates.TemplateResponse("webhooks.html", {"request": request}) # type: ignore return templates.TemplateResponse("webhooks.html", {"request": request})
@app.get("/", response_class=HTMLResponse) @app.get("/", response_class=HTMLResponse)
@ -430,11 +432,11 @@ def index(request: Request):
Returns: Returns:
HTMLResponse: The HTML response. HTMLResponse: The HTML response.
""" """
context = make_context_index(request) # type: ignore context = make_context_index(request)
return templates.TemplateResponse("index.html", context) # type: ignore return templates.TemplateResponse("index.html", context)
def make_context_index(request: Request) -> dict[str, Any]: def make_context_index(request: Request):
""" """
Create the needed context for the index page. Create the needed context for the index page.
@ -452,12 +454,12 @@ def make_context_index(request: Request) -> dict[str, Any]:
except TagNotFoundError: except TagNotFoundError:
hooks = [] hooks = []
feed_list: list[dict[str, Any]] = [] feed_list = []
broken_feeds: list[Feed] = [] broken_feeds = []
feeds: Iterable[Feed] = reader.get_feeds() feeds: Iterable[Feed] = reader.get_feeds()
for feed in feeds: for feed in feeds:
try: try:
hook: str = reader.get_tag(feed.url, "webhook") # type: ignore hook = reader.get_tag(feed.url, "webhook")
feed_list.append({"feed": feed, "webhook": hook}) feed_list.append({"feed": feed, "webhook": hook})
except TagNotFoundError: except TagNotFoundError:
broken_feeds.append(feed) broken_feeds.append(feed)
@ -468,7 +470,7 @@ def make_context_index(request: Request) -> dict[str, Any]:
feed_count: FeedCounts = reader.get_feed_counts() feed_count: FeedCounts = reader.get_feed_counts()
entry_count: EntryCounts = reader.get_entry_counts() entry_count: EntryCounts = reader.get_entry_counts()
context: dict[str, Any] = { return {
"request": request, "request": request,
"feeds": feed_list, "feeds": feed_list,
"feed_count": feed_count, "feed_count": feed_count,
@ -476,11 +478,10 @@ def make_context_index(request: Request) -> dict[str, Any]:
"webhooks": hooks, "webhooks": hooks,
"broken_feeds": broken_feeds, "broken_feeds": broken_feeds,
} }
return context
@app.post("/remove", response_class=HTMLResponse) @app.post("/remove", response_class=HTMLResponse)
async def remove_feed(feed_url: str = Form()): async def remove_feed(feed_url=Form()):
""" """
Get a feed by URL. Get a feed by URL.
@ -497,7 +498,7 @@ async def remove_feed(feed_url: str = Form()):
@app.get("/search", response_class=HTMLResponse) @app.get("/search", response_class=HTMLResponse)
async def search(request: Request, query: str): async def search(request: Request, query):
""" """
Get entries matching a full-text search query. Get entries matching a full-text search query.
@ -514,16 +515,16 @@ async def search(request: Request, query: str):
search_html: str = create_html_for_search_results(search_results) search_html: str = create_html_for_search_results(search_results)
context: dict[str, Request | str | EntrySearchCounts] = { context = {
"request": request, "request": request,
"search_html": search_html, "search_html": search_html,
"query": query, "query": query,
"search_amount": search_amount, "search_amount": search_amount,
} }
return templates.TemplateResponse("search.html", context) # type: ignore return templates.TemplateResponse("search.html", context)
@app.on_event("startup") # type: ignore @app.on_event("startup")
def startup() -> None: def startup() -> None:
"""This is called when the server starts. """This is called when the server starts.
@ -531,10 +532,10 @@ def startup() -> None:
scheduler: BackgroundScheduler = BackgroundScheduler() scheduler: BackgroundScheduler = BackgroundScheduler()
# Update all feeds every 15 minutes. # Update all feeds every 15 minutes.
scheduler.add_job(send_to_discord, "interval", minutes=15, next_run_time=datetime.now()) # type: ignore scheduler.add_job(send_to_discord, "interval", minutes=15, next_run_time=datetime.now())
scheduler.start() # type: ignore scheduler.start()
if __name__ == "__main__": if __name__ == "__main__":
uvicorn.run("main:app", log_level="debug", reload=True) # type: ignore uvicorn.run("main:app", log_level="debug", reload=True)

View File

@ -10,7 +10,7 @@ data_dir: str = user_data_dir(appname="discord_rss_bot", appauthor="TheLovinator
os.makedirs(data_dir, exist_ok=True) os.makedirs(data_dir, exist_ok=True)
def get_webhook_for_entry(custom_reader: Reader, entry: Entry) -> str | None: def get_webhook_for_entry(custom_reader: Reader, entry: Entry) -> str:
""" """
Get the webhook from the database. Get the webhook from the database.
@ -26,12 +26,12 @@ def get_webhook_for_entry(custom_reader: Reader, entry: Entry) -> str | None:
# Get the webhook from the feed. # Get the webhook from the feed.
# Is None if not found or error. # Is None if not found or error.
webhook_url: str | None webhook_url: str
try: try:
webhook_url = str(reader.get_tag(entry.feed_url, "webhook")) webhook_url = str(reader.get_tag(entry.feed_url, "webhook"))
except TagNotFoundError: except TagNotFoundError:
print(f"Webhook not found for feed {entry.feed_url}") print(f"Webhook not found for feed {entry.feed_url}")
webhook_url = None webhook_url = ""
return webhook_url return webhook_url

View File

@ -53,7 +53,7 @@
<details> <details>
<summary>Summary</summary> <summary>Summary</summary>
<div class="text-muted text-wrap summary"> <div class="text-muted text-wrap summary">
{{ entry.summary | safe }} {{ entry.summary | discord_markdown }}
</div> </div>
</details> </details>
{% endif %} {% endif %}
@ -61,7 +61,7 @@
<details> <details>
<summary>Content</summary> <summary>Content</summary>
<div class="text-muted summary"> <div class="text-muted summary">
{{ entry.content[0].value | safe }} {{ entry.content[0].value | discord_markdown }}
</div> </div>
</details> </details>
{% endif %} {% endif %}

18
poetry.lock generated
View File

@ -380,6 +380,22 @@ MarkupSafe = ">=2.0"
[package.extras] [package.extras]
i18n = ["Babel (>=2.7)"] i18n = ["Babel (>=2.7)"]
[[package]]
name = "markdownify"
version = "0.11.6"
description = "Convert HTML to markdown."
category = "main"
optional = false
python-versions = "*"
files = [
{file = "markdownify-0.11.6-py3-none-any.whl", hash = "sha256:ba35fe289d5e9073bcd7d2cad629278fe25f1a93741fcdc0bfb4f009076d8324"},
{file = "markdownify-0.11.6.tar.gz", hash = "sha256:009b240e0c9f4c8eaf1d085625dcd4011e12f0f8cec55dedf9ea6f7655e49bfe"},
]
[package.dependencies]
beautifulsoup4 = ">=4.9,<5"
six = ">=1.15,<2"
[[package]] [[package]]
name = "markupsafe" name = "markupsafe"
version = "2.1.1" version = "2.1.1"
@ -1075,4 +1091,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "37fcda2250c0a72f4d127ce22a0d243609277379431e6f72d828beaf90b8f0f1" content-hash = "d49e7d798db6866a97b7a9ced8b13efbe0c9465cac2cf4dd6dacd3d29da70f73"

View File

@ -16,6 +16,7 @@ apscheduler = "^3.9.1.post1"
python-multipart = "^0.0.5" python-multipart = "^0.0.5"
python-dotenv = "^0.21.0" python-dotenv = "^0.21.0"
tomlkit = "^0.11.6" tomlkit = "^0.11.6"
markdownify = "^0.11.6"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
pytest = "^7.2.0" pytest = "^7.2.0"