Add domain-wide blacklist and whitelist functionality

This commit is contained in:
Joakim Hellsén 2026-04-12 23:51:05 +02:00
commit bdbd46ebd4
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
14 changed files with 930 additions and 305 deletions

View file

@ -54,6 +54,7 @@ from discord_rss_bot.feeds import get_feed_delivery_mode
from discord_rss_bot.feeds import get_screenshot_layout
from discord_rss_bot.feeds import send_entry_to_discord
from discord_rss_bot.feeds import send_to_discord
from discord_rss_bot.filter.utils import get_domain_key
from discord_rss_bot.git_backup import commit_state_change
from discord_rss_bot.git_backup import get_backup_path
from discord_rss_bot.is_url_valid import is_url_valid
@ -399,6 +400,7 @@ async def post_unpause_feed(
@app.post("/whitelist")
async def post_set_whitelist(
reader: Annotated[Reader, Depends(get_reader_dependency)],
*,
whitelist_title: Annotated[str, Form()] = "",
whitelist_summary: Annotated[str, Form()] = "",
whitelist_content: Annotated[str, Form()] = "",
@ -407,6 +409,7 @@ async def post_set_whitelist(
regex_whitelist_summary: Annotated[str, Form()] = "",
regex_whitelist_content: Annotated[str, Form()] = "",
regex_whitelist_author: Annotated[str, Form()] = "",
apply_to_domain: Annotated[bool, Form()] = False,
feed_url: Annotated[str, Form()] = "",
) -> RedirectResponse:
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
@ -420,6 +423,7 @@ async def post_set_whitelist(
regex_whitelist_summary: Whitelisted regex for when checking the summary.
regex_whitelist_content: Whitelisted regex for when checking the content.
regex_whitelist_author: Whitelisted regex for when checking the author.
apply_to_domain: Also store these values as domain-wide whitelist rules.
feed_url: The feed we should set the whitelist for.
reader: The Reader instance.
@ -427,16 +431,43 @@ async def post_set_whitelist(
RedirectResponse: Redirect to the feed page.
"""
clean_feed_url: str = feed_url.strip() if feed_url else ""
reader.set_tag(clean_feed_url, "whitelist_title", whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_title", regex_whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_summary", regex_whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_content", regex_whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_author", regex_whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
whitelist_values: dict[str, str] = {
"whitelist_title": whitelist_title.strip(),
"whitelist_summary": whitelist_summary.strip(),
"whitelist_content": whitelist_content.strip(),
"whitelist_author": whitelist_author.strip(),
"regex_whitelist_title": regex_whitelist_title.strip(),
"regex_whitelist_summary": regex_whitelist_summary.strip(),
"regex_whitelist_content": regex_whitelist_content.strip(),
"regex_whitelist_author": regex_whitelist_author.strip(),
}
commit_state_change(reader, f"Update whitelist for {clean_feed_url}")
for tag, value in whitelist_values.items():
reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload]
message: str = f"Update whitelist for {clean_feed_url}"
if apply_to_domain:
domain_key: str = get_domain_key(clean_feed_url)
if domain_key:
domain_whitelists_raw = reader.get_tag((), "domain_whitelist", {})
domain_whitelists: dict[str, dict[str, str]] = {}
if isinstance(domain_whitelists_raw, dict):
for existing_domain, existing_values in domain_whitelists_raw.items():
if isinstance(existing_domain, str) and isinstance(existing_values, dict):
domain_whitelists[existing_domain] = {
str(key): str(value) for key, value in existing_values.items() if isinstance(key, str)
}
domain_values: dict[str, str] = {k: v for k, v in whitelist_values.items() if v}
if domain_values:
domain_whitelists[domain_key] = domain_values
else:
domain_whitelists.pop(domain_key, None)
reader.set_tag((), "domain_whitelist", domain_whitelists) # pyright: ignore[reportArgumentType]
message = f"Update whitelist for {clean_feed_url} and domain {domain_key}"
commit_state_change(reader, message)
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
@ -468,6 +499,11 @@ async def get_whitelist(
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
domain_key: str = get_domain_key(feed.url)
domain_whitelist_raw = reader.get_tag((), "domain_whitelist", {})
domain_whitelist_enabled: bool = bool(
isinstance(domain_whitelist_raw, dict) and domain_key and domain_key in domain_whitelist_raw,
)
context = {
"request": request,
@ -480,6 +516,9 @@ async def get_whitelist(
"regex_whitelist_summary": regex_whitelist_summary,
"regex_whitelist_content": regex_whitelist_content,
"regex_whitelist_author": regex_whitelist_author,
"domain_key": domain_key,
"domain_name": extract_domain(feed.url),
"domain_whitelist_enabled": domain_whitelist_enabled,
}
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
@ -487,6 +526,7 @@ async def get_whitelist(
@app.post("/blacklist")
async def post_set_blacklist(
reader: Annotated[Reader, Depends(get_reader_dependency)],
*,
blacklist_title: Annotated[str, Form()] = "",
blacklist_summary: Annotated[str, Form()] = "",
blacklist_content: Annotated[str, Form()] = "",
@ -495,6 +535,7 @@ async def post_set_blacklist(
regex_blacklist_summary: Annotated[str, Form()] = "",
regex_blacklist_content: Annotated[str, Form()] = "",
regex_blacklist_author: Annotated[str, Form()] = "",
apply_to_domain: Annotated[bool, Form()] = False,
feed_url: Annotated[str, Form()] = "",
) -> RedirectResponse:
"""Set the blacklist.
@ -511,6 +552,7 @@ async def post_set_blacklist(
regex_blacklist_summary: Blacklisted regex for when checking the summary.
regex_blacklist_content: Blacklisted regex for when checking the content.
regex_blacklist_author: Blacklisted regex for when checking the author.
apply_to_domain: Also store these values as domain-wide blacklist rules.
feed_url: What feed we should set the blacklist for.
reader: The Reader instance.
@ -518,15 +560,43 @@ async def post_set_blacklist(
RedirectResponse: Redirect to the feed page.
"""
clean_feed_url: str = feed_url.strip() if feed_url else ""
reader.set_tag(clean_feed_url, "blacklist_title", blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_title", regex_blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_summary", regex_blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_content", regex_blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_author", regex_blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
commit_state_change(reader, f"Update blacklist for {clean_feed_url}")
blacklist_values: dict[str, str] = {
"blacklist_title": blacklist_title.strip(),
"blacklist_summary": blacklist_summary.strip(),
"blacklist_content": blacklist_content.strip(),
"blacklist_author": blacklist_author.strip(),
"regex_blacklist_title": regex_blacklist_title.strip(),
"regex_blacklist_summary": regex_blacklist_summary.strip(),
"regex_blacklist_content": regex_blacklist_content.strip(),
"regex_blacklist_author": regex_blacklist_author.strip(),
}
for tag, value in blacklist_values.items():
reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload]
message: str = f"Update blacklist for {clean_feed_url}"
if apply_to_domain:
domain_key: str = get_domain_key(clean_feed_url)
if domain_key:
domain_blacklists_raw = reader.get_tag((), "domain_blacklist", {})
domain_blacklists: dict[str, dict[str, str]] = {}
if isinstance(domain_blacklists_raw, dict):
for existing_domain, existing_values in domain_blacklists_raw.items():
if isinstance(existing_domain, str) and isinstance(existing_values, dict):
domain_blacklists[existing_domain] = {
str(key): str(value) for key, value in existing_values.items() if isinstance(key, str)
}
domain_values: dict[str, str] = {k: v for k, v in blacklist_values.items() if v}
if domain_values:
domain_blacklists[domain_key] = domain_values
else:
domain_blacklists.pop(domain_key, None)
reader.set_tag((), "domain_blacklist", domain_blacklists) # pyright: ignore[reportArgumentType]
message = f"Update blacklist for {clean_feed_url} and domain {domain_key}"
commit_state_change(reader, message)
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
@ -556,6 +626,11 @@ async def get_blacklist(
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
domain_key: str = get_domain_key(feed.url)
domain_blacklist_raw = reader.get_tag((), "domain_blacklist", {})
domain_blacklist_enabled: bool = bool(
isinstance(domain_blacklist_raw, dict) and domain_key and domain_key in domain_blacklist_raw,
)
context = {
"request": request,
@ -568,6 +643,9 @@ async def get_blacklist(
"regex_blacklist_summary": regex_blacklist_summary,
"regex_blacklist_content": regex_blacklist_content,
"regex_blacklist_author": regex_blacklist_author,
"domain_key": domain_key,
"domain_name": extract_domain(feed.url),
"domain_blacklist_enabled": domain_blacklist_enabled,
}
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)