Add domain-wide blacklist and whitelist functionality

This commit is contained in:
Joakim Hellsén 2026-04-12 23:51:05 +02:00
commit bdbd46ebd4
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
14 changed files with 930 additions and 305 deletions

View file

@ -711,15 +711,14 @@ def send_to_discord(reader: Reader | None = None, feed: Feed | None = None, *, d
use_default_message_on_empty=True,
)
# Check if the entry is blacklisted, and if it is, we will skip it.
if entry_should_be_skipped(effective_reader, entry):
logger.info("Entry was blacklisted: %s", entry.id)
continue
# Check if the feed has a whitelist, and if it does, check if the entry is whitelisted.
if has_white_tags(effective_reader, entry.feed) and not should_be_sent(effective_reader, entry):
# Whitelist should take precedence when configured.
if has_white_tags(effective_reader, entry.feed):
if not should_be_sent(effective_reader, entry):
logger.info("Entry was not whitelisted: %s", entry.id)
continue
elif entry_should_be_skipped(effective_reader, entry):
logger.info("Entry was blacklisted: %s", entry.id)
continue
# Use a custom webhook for Hoyolab feeds.
if is_c3kay_feed(entry.feed.url):

View file

@ -2,6 +2,7 @@ from __future__ import annotations
from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import get_domain_filter_tags
from discord_rss_bot.filter.utils import is_regex_match
from discord_rss_bot.filter.utils import is_word_in_text
@ -11,6 +12,37 @@ if TYPE_CHECKING:
from reader import Reader
_MATCH_FIELDS: tuple[str, ...] = ("title", "summary", "content", "author")
def _get_effective_blacklist_values(reader: Reader, feed: Feed) -> tuple[dict[str, str], dict[str, str]]:
"""Return merged feed-level and domain-level blacklist values."""
local_values: dict[str, str] = {
field: str(reader.get_tag(feed, f"blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
}
local_regex_values: dict[str, str] = {
field: str(reader.get_tag(feed, f"regex_blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
}
domain_values_raw: dict[str, str] = get_domain_filter_tags(reader, feed, "domain_blacklist")
domain_values: dict[str, str] = {
field: str(domain_values_raw.get(f"blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
}
domain_regex_values: dict[str, str] = {
field: str(domain_values_raw.get(f"regex_blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
}
merged_values: dict[str, str] = {
field: ",".join(value for value in (local_values[field], domain_values[field]) if value)
for field in _MATCH_FIELDS
}
merged_regex_values: dict[str, str] = {
field: "\n".join(value for value in (local_regex_values[field], domain_regex_values[field]) if value)
for field in _MATCH_FIELDS
}
return merged_values, merged_regex_values
def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
"""Return True if the feed has blacklist tags.
@ -31,26 +63,8 @@ def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
Returns:
bool: If the feed has any of the tags.
"""
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
return bool(
blacklist_title
or blacklist_author
or blacklist_content
or blacklist_summary
or regex_blacklist_author
or regex_blacklist_content
or regex_blacklist_summary
or regex_blacklist_title,
)
merged_values, merged_regex_values = _get_effective_blacklist_values(reader, feed)
return any(merged_values.values()) or any(merged_regex_values.values())
def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
@ -63,58 +77,55 @@ def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0
Returns:
bool: If the entry is in the blacklist.
"""
feed = entry.feed
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
merged_values, merged_regex_values = _get_effective_blacklist_values(reader, entry.feed)
# TODO(TheLovinator): Also add support for entry_text and more.
# Check regular blacklist
if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
if entry.title and merged_values["title"] and is_word_in_text(merged_values["title"], entry.title):
return True
if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
if entry.summary and merged_values["summary"] and is_word_in_text(merged_values["summary"], entry.summary):
return True
if (
entry.content
and entry.content[0].value
and blacklist_content
and is_word_in_text(blacklist_content, entry.content[0].value)
and merged_values["content"]
and is_word_in_text(merged_values["content"], entry.content[0].value)
):
return True
if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
if entry.author and merged_values["author"] and is_word_in_text(merged_values["author"], entry.author):
return True
if (
entry.content
and entry.content[0].value
and blacklist_content
and is_word_in_text(blacklist_content, entry.content[0].value)
and merged_values["content"]
and is_word_in_text(merged_values["content"], entry.content[0].value)
):
return True
# Check regex blacklist
if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title):
if entry.title and merged_regex_values["title"] and is_regex_match(merged_regex_values["title"], entry.title):
return True
if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary):
if (
entry.summary
and merged_regex_values["summary"]
and is_regex_match(
merged_regex_values["summary"],
entry.summary,
)
):
return True
if (
entry.content
and entry.content[0].value
and regex_blacklist_content
and is_regex_match(regex_blacklist_content, entry.content[0].value)
and merged_regex_values["content"]
and is_regex_match(merged_regex_values["content"], entry.content[0].value)
):
return True
if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author):
if entry.author and merged_regex_values["author"] and is_regex_match(merged_regex_values["author"], entry.author):
return True
return bool(
entry.content
and entry.content[0].value
and regex_blacklist_content
and is_regex_match(regex_blacklist_content, entry.content[0].value),
and merged_regex_values["content"]
and is_regex_match(merged_regex_values["content"], entry.content[0].value),
)

View file

@ -2,6 +2,14 @@ from __future__ import annotations
import logging
import re
from typing import TYPE_CHECKING
from urllib.parse import urlparse
import tldextract
if TYPE_CHECKING:
from reader import Feed
from reader import Reader
logger: logging.Logger = logging.getLogger(__name__)
@ -70,3 +78,53 @@ def is_regex_match(regex_string: str, text: str) -> bool:
logger.info("No regex patterns matched.")
return False
def get_domain_key(url: str) -> str:
"""Return a normalized domain key used for domain-wide filters.
Args:
url: The URL to extract the domain from.
Returns:
str: A normalized domain key (e.g. ``example.com``).
"""
if not url:
return ""
parsed_url = urlparse(url)
host: str = parsed_url.netloc.lower().strip()
host = host.removeprefix("www.")
if not host:
return ""
ext = tldextract.extract(host)
top_domain: str = ext.top_domain_under_public_suffix
return top_domain or host
def get_domain_filter_tags(reader: Reader, feed: Feed, tag_name: str) -> dict[str, str]:
"""Return domain-wide filter tags for a feed.
Args:
reader: Reader instance.
feed: Feed instance.
tag_name: Global tag name that stores domain filters.
Returns:
dict[str, str]: Domain filter values for the feed's domain.
"""
domain_key: str = get_domain_key(str(feed.url))
if not domain_key:
return {}
domain_filters: object = reader.get_tag((), tag_name, {})
if not isinstance(domain_filters, dict):
return {}
values: object = domain_filters.get(domain_key, {})
if not isinstance(values, dict):
return {}
return {str(key): str(value) for key, value in values.items() if isinstance(key, str)}

View file

@ -2,6 +2,7 @@ from __future__ import annotations
from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import get_domain_filter_tags
from discord_rss_bot.filter.utils import is_regex_match
from discord_rss_bot.filter.utils import is_word_in_text
@ -11,6 +12,37 @@ if TYPE_CHECKING:
from reader import Reader
_MATCH_FIELDS: tuple[str, ...] = ("title", "summary", "content", "author")
def _get_effective_whitelist_values(reader: Reader, feed: Feed) -> tuple[dict[str, str], dict[str, str]]:
"""Return merged feed-level and domain-level whitelist values."""
local_values: dict[str, str] = {
field: str(reader.get_tag(feed, f"whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
}
local_regex_values: dict[str, str] = {
field: str(reader.get_tag(feed, f"regex_whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
}
domain_values_raw: dict[str, str] = get_domain_filter_tags(reader, feed, "domain_whitelist")
domain_values: dict[str, str] = {
field: str(domain_values_raw.get(f"whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
}
domain_regex_values: dict[str, str] = {
field: str(domain_values_raw.get(f"regex_whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
}
merged_values: dict[str, str] = {
field: ",".join(value for value in (local_values[field], domain_values[field]) if value)
for field in _MATCH_FIELDS
}
merged_regex_values: dict[str, str] = {
field: "\n".join(value for value in (local_regex_values[field], domain_regex_values[field]) if value)
for field in _MATCH_FIELDS
}
return merged_values, merged_regex_values
def has_white_tags(reader: Reader, feed: Feed) -> bool:
"""Return True if the feed has whitelist tags.
@ -31,26 +63,8 @@ def has_white_tags(reader: Reader, feed: Feed) -> bool:
Returns:
bool: If the feed has any of the tags.
"""
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
return bool(
whitelist_title
or whitelist_author
or whitelist_content
or whitelist_summary
or regex_whitelist_author
or regex_whitelist_content
or regex_whitelist_summary
or regex_whitelist_title,
)
merged_values, merged_regex_values = _get_effective_whitelist_values(reader, feed)
return any(merged_values.values()) or any(merged_regex_values.values())
def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
@ -63,44 +77,40 @@ def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
Returns:
bool: If the entry is in the whitelist.
"""
feed: Feed = entry.feed
# Regular whitelist tags
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
# Regex whitelist tags
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
merged_values, merged_regex_values = _get_effective_whitelist_values(reader, entry.feed)
# Check regular whitelist
if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
if entry.title and merged_values["title"] and is_word_in_text(merged_values["title"], entry.title):
return True
if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
if entry.summary and merged_values["summary"] and is_word_in_text(merged_values["summary"], entry.summary):
return True
if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
if entry.author and merged_values["author"] and is_word_in_text(merged_values["author"], entry.author):
return True
if (
entry.content
and entry.content[0].value
and whitelist_content
and is_word_in_text(whitelist_content, entry.content[0].value)
and merged_values["content"]
and is_word_in_text(merged_values["content"], entry.content[0].value)
):
return True
# Check regex whitelist
if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title):
if entry.title and merged_regex_values["title"] and is_regex_match(merged_regex_values["title"], entry.title):
return True
if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary):
if (
entry.summary
and merged_regex_values["summary"]
and is_regex_match(
merged_regex_values["summary"],
entry.summary,
)
):
return True
if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author):
if entry.author and merged_regex_values["author"] and is_regex_match(merged_regex_values["author"], entry.author):
return True
return bool(
entry.content
and entry.content[0].value
and regex_whitelist_content
and is_regex_match(regex_whitelist_content, entry.content[0].value),
and merged_regex_values["content"]
and is_regex_match(merged_regex_values["content"], entry.content[0].value),
)

View file

@ -28,7 +28,7 @@ import shutil
import subprocess # noqa: S404
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import cast
if TYPE_CHECKING:
from reader import Reader
@ -37,11 +37,8 @@ logger: logging.Logger = logging.getLogger(__name__)
GIT_EXECUTABLE: str = shutil.which("git") or "git"
type TAG_VALUE = (
dict[str, str | int | float | bool | dict[str, Any] | list[Any] | None]
| list[str | int | float | bool | dict[str, Any] | list[Any] | None]
| None
)
type JsonScalar = str | int | float | bool | None
type JsonLike = JsonScalar | dict[str, JsonLike] | list[JsonLike]
# Tags that are exported per-feed (empty values are omitted).
_FEED_TAGS: tuple[str, ...] = (
@ -157,47 +154,68 @@ def setup_backup_repo(backup_path: Path) -> bool:
return True
def export_state(reader: Reader, backup_path: Path) -> None:
"""Serialise the current bot state to ``state.json`` inside *backup_path*.
def _build_feed_state(reader: Reader) -> list[JsonLike]:
"""Collect feed and per-feed tag state.
Args:
reader: The :class:`reader.Reader` instance to read state from.
backup_path: Destination directory for the exported ``state.json``.
Returns:
A list of dictionaries containing feed URLs and their associated tag values.
"""
feeds_state: list[dict] = []
feeds_state: list[JsonLike] = []
for feed in reader.get_feeds():
feed_data: dict = {"url": feed.url}
feed_data: dict[str, JsonLike] = {"url": feed.url}
for tag in _FEED_TAGS:
try:
value: TAG_VALUE = reader.get_tag(feed, tag, None)
value: JsonLike | None = cast("JsonLike | None", reader.get_tag(feed, tag, None))
if value is not None and value != "": # noqa: PLC1901
feed_data[tag] = value
except Exception:
logger.exception("Failed to read tag '%s' for feed '%s' during state export", tag, feed.url)
feeds_state.append(feed_data)
return feeds_state
webhooks: list[str | int | float | bool | dict[str, Any] | list[Any] | None] = list(
reader.get_tag((), "webhooks", []),
)
# Export global update interval if set
global_update_interval: dict[str, Any] | None = None
global_update_config = reader.get_tag((), ".reader.update", None)
if isinstance(global_update_config, dict):
global_update_interval = global_update_config
def _get_global_dict_tag(reader: Reader, tag_name: str) -> dict[str, JsonLike] | None:
"""Return a global tag value if it is a dictionary."""
tag_value: JsonLike | None = cast("JsonLike | None", reader.get_tag((), tag_name, None))
return tag_value if isinstance(tag_value, dict) else None
global_screenshot_layout: str | None = None
screenshot_layout = reader.get_tag((), "screenshot_layout", None)
if isinstance(screenshot_layout, str):
clean_layout = screenshot_layout.strip().lower()
if clean_layout in {"desktop", "mobile"}:
global_screenshot_layout = clean_layout
state: dict = {"feeds": feeds_state, "webhooks": webhooks}
def _get_global_screenshot_layout(reader: Reader) -> str | None:
"""Return normalized global screenshot layout if valid."""
screenshot_layout: JsonLike | None = cast("JsonLike | None", reader.get_tag((), "screenshot_layout", None))
if not isinstance(screenshot_layout, str):
return None
clean_layout: str = screenshot_layout.strip().lower()
return clean_layout if clean_layout in {"desktop", "mobile"} else None
def export_state(reader: Reader, backup_path: Path) -> None:
"""Serialize the current bot state to ``state.json`` inside *backup_path*.
Args:
reader: The :class:`reader.Reader` instance to read state from.
backup_path: Destination directory for the exported ``state.json``.
"""
feeds_state: list[JsonLike] = _build_feed_state(reader)
webhooks_raw: JsonLike | None = cast("JsonLike | None", reader.get_tag((), "webhooks", []))
webhooks: list[JsonLike] = webhooks_raw if isinstance(webhooks_raw, list) else []
global_update_interval: dict[str, JsonLike] | None = _get_global_dict_tag(reader, ".reader.update")
global_screenshot_layout: str | None = _get_global_screenshot_layout(reader)
domain_blacklist: dict[str, JsonLike] | None = _get_global_dict_tag(reader, "domain_blacklist")
domain_whitelist: dict[str, JsonLike] | None = _get_global_dict_tag(reader, "domain_whitelist")
state: dict[str, JsonLike] = {"feeds": feeds_state, "webhooks": webhooks}
if global_update_interval is not None:
state["global_update_interval"] = global_update_interval
if global_screenshot_layout is not None:
state["global_screenshot_layout"] = global_screenshot_layout
if domain_blacklist is not None:
state["domain_blacklist"] = domain_blacklist
if domain_whitelist is not None:
state["domain_whitelist"] = domain_whitelist
state_file: Path = backup_path / "state.json"
state_file.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8")

View file

@ -54,6 +54,7 @@ from discord_rss_bot.feeds import get_feed_delivery_mode
from discord_rss_bot.feeds import get_screenshot_layout
from discord_rss_bot.feeds import send_entry_to_discord
from discord_rss_bot.feeds import send_to_discord
from discord_rss_bot.filter.utils import get_domain_key
from discord_rss_bot.git_backup import commit_state_change
from discord_rss_bot.git_backup import get_backup_path
from discord_rss_bot.is_url_valid import is_url_valid
@ -399,6 +400,7 @@ async def post_unpause_feed(
@app.post("/whitelist")
async def post_set_whitelist(
reader: Annotated[Reader, Depends(get_reader_dependency)],
*,
whitelist_title: Annotated[str, Form()] = "",
whitelist_summary: Annotated[str, Form()] = "",
whitelist_content: Annotated[str, Form()] = "",
@ -407,6 +409,7 @@ async def post_set_whitelist(
regex_whitelist_summary: Annotated[str, Form()] = "",
regex_whitelist_content: Annotated[str, Form()] = "",
regex_whitelist_author: Annotated[str, Form()] = "",
apply_to_domain: Annotated[bool, Form()] = False,
feed_url: Annotated[str, Form()] = "",
) -> RedirectResponse:
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
@ -420,6 +423,7 @@ async def post_set_whitelist(
regex_whitelist_summary: Whitelisted regex for when checking the summary.
regex_whitelist_content: Whitelisted regex for when checking the content.
regex_whitelist_author: Whitelisted regex for when checking the author.
apply_to_domain: Also store these values as domain-wide whitelist rules.
feed_url: The feed we should set the whitelist for.
reader: The Reader instance.
@ -427,16 +431,43 @@ async def post_set_whitelist(
RedirectResponse: Redirect to the feed page.
"""
clean_feed_url: str = feed_url.strip() if feed_url else ""
reader.set_tag(clean_feed_url, "whitelist_title", whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_title", regex_whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_summary", regex_whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_content", regex_whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_whitelist_author", regex_whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
whitelist_values: dict[str, str] = {
"whitelist_title": whitelist_title.strip(),
"whitelist_summary": whitelist_summary.strip(),
"whitelist_content": whitelist_content.strip(),
"whitelist_author": whitelist_author.strip(),
"regex_whitelist_title": regex_whitelist_title.strip(),
"regex_whitelist_summary": regex_whitelist_summary.strip(),
"regex_whitelist_content": regex_whitelist_content.strip(),
"regex_whitelist_author": regex_whitelist_author.strip(),
}
commit_state_change(reader, f"Update whitelist for {clean_feed_url}")
for tag, value in whitelist_values.items():
reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload]
message: str = f"Update whitelist for {clean_feed_url}"
if apply_to_domain:
domain_key: str = get_domain_key(clean_feed_url)
if domain_key:
domain_whitelists_raw = reader.get_tag((), "domain_whitelist", {})
domain_whitelists: dict[str, dict[str, str]] = {}
if isinstance(domain_whitelists_raw, dict):
for existing_domain, existing_values in domain_whitelists_raw.items():
if isinstance(existing_domain, str) and isinstance(existing_values, dict):
domain_whitelists[existing_domain] = {
str(key): str(value) for key, value in existing_values.items() if isinstance(key, str)
}
domain_values: dict[str, str] = {k: v for k, v in whitelist_values.items() if v}
if domain_values:
domain_whitelists[domain_key] = domain_values
else:
domain_whitelists.pop(domain_key, None)
reader.set_tag((), "domain_whitelist", domain_whitelists) # pyright: ignore[reportArgumentType]
message = f"Update whitelist for {clean_feed_url} and domain {domain_key}"
commit_state_change(reader, message)
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
@ -468,6 +499,11 @@ async def get_whitelist(
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
domain_key: str = get_domain_key(feed.url)
domain_whitelist_raw = reader.get_tag((), "domain_whitelist", {})
domain_whitelist_enabled: bool = bool(
isinstance(domain_whitelist_raw, dict) and domain_key and domain_key in domain_whitelist_raw,
)
context = {
"request": request,
@ -480,6 +516,9 @@ async def get_whitelist(
"regex_whitelist_summary": regex_whitelist_summary,
"regex_whitelist_content": regex_whitelist_content,
"regex_whitelist_author": regex_whitelist_author,
"domain_key": domain_key,
"domain_name": extract_domain(feed.url),
"domain_whitelist_enabled": domain_whitelist_enabled,
}
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
@ -487,6 +526,7 @@ async def get_whitelist(
@app.post("/blacklist")
async def post_set_blacklist(
reader: Annotated[Reader, Depends(get_reader_dependency)],
*,
blacklist_title: Annotated[str, Form()] = "",
blacklist_summary: Annotated[str, Form()] = "",
blacklist_content: Annotated[str, Form()] = "",
@ -495,6 +535,7 @@ async def post_set_blacklist(
regex_blacklist_summary: Annotated[str, Form()] = "",
regex_blacklist_content: Annotated[str, Form()] = "",
regex_blacklist_author: Annotated[str, Form()] = "",
apply_to_domain: Annotated[bool, Form()] = False,
feed_url: Annotated[str, Form()] = "",
) -> RedirectResponse:
"""Set the blacklist.
@ -511,6 +552,7 @@ async def post_set_blacklist(
regex_blacklist_summary: Blacklisted regex for when checking the summary.
regex_blacklist_content: Blacklisted regex for when checking the content.
regex_blacklist_author: Blacklisted regex for when checking the author.
apply_to_domain: Also store these values as domain-wide blacklist rules.
feed_url: What feed we should set the blacklist for.
reader: The Reader instance.
@ -518,15 +560,43 @@ async def post_set_blacklist(
RedirectResponse: Redirect to the feed page.
"""
clean_feed_url: str = feed_url.strip() if feed_url else ""
reader.set_tag(clean_feed_url, "blacklist_title", blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_title", regex_blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_summary", regex_blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_content", regex_blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
reader.set_tag(clean_feed_url, "regex_blacklist_author", regex_blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
commit_state_change(reader, f"Update blacklist for {clean_feed_url}")
blacklist_values: dict[str, str] = {
"blacklist_title": blacklist_title.strip(),
"blacklist_summary": blacklist_summary.strip(),
"blacklist_content": blacklist_content.strip(),
"blacklist_author": blacklist_author.strip(),
"regex_blacklist_title": regex_blacklist_title.strip(),
"regex_blacklist_summary": regex_blacklist_summary.strip(),
"regex_blacklist_content": regex_blacklist_content.strip(),
"regex_blacklist_author": regex_blacklist_author.strip(),
}
for tag, value in blacklist_values.items():
reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload]
message: str = f"Update blacklist for {clean_feed_url}"
if apply_to_domain:
domain_key: str = get_domain_key(clean_feed_url)
if domain_key:
domain_blacklists_raw = reader.get_tag((), "domain_blacklist", {})
domain_blacklists: dict[str, dict[str, str]] = {}
if isinstance(domain_blacklists_raw, dict):
for existing_domain, existing_values in domain_blacklists_raw.items():
if isinstance(existing_domain, str) and isinstance(existing_values, dict):
domain_blacklists[existing_domain] = {
str(key): str(value) for key, value in existing_values.items() if isinstance(key, str)
}
domain_values: dict[str, str] = {k: v for k, v in blacklist_values.items() if v}
if domain_values:
domain_blacklists[domain_key] = domain_values
else:
domain_blacklists.pop(domain_key, None)
reader.set_tag((), "domain_blacklist", domain_blacklists) # pyright: ignore[reportArgumentType]
message = f"Update blacklist for {clean_feed_url} and domain {domain_key}"
commit_state_change(reader, message)
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
@ -556,6 +626,11 @@ async def get_blacklist(
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
domain_key: str = get_domain_key(feed.url)
domain_blacklist_raw = reader.get_tag((), "domain_blacklist", {})
domain_blacklist_enabled: bool = bool(
isinstance(domain_blacklist_raw, dict) and domain_key and domain_key in domain_blacklist_raw,
)
context = {
"request": request,
@ -568,6 +643,9 @@ async def get_blacklist(
"regex_blacklist_summary": regex_blacklist_summary,
"regex_blacklist_content": regex_blacklist_content,
"regex_blacklist_author": regex_blacklist_author,
"domain_key": domain_key,
"domain_name": extract_domain(feed.url),
"domain_blacklist_enabled": domain_blacklist_enabled,
}
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)

View file

@ -1,9 +1,9 @@
{% extends "base.html" %}
{% block title %}
| Blacklist
| Blacklist
{% endblock title %}
{% block content %}
<div class="p-2 border border-dark">
<div class="p-2 border border-dark">
<form action="/blacklist" method="post">
<!-- Feed URL -->
<div class="row pb-2">
@ -30,19 +30,33 @@
</li>
</ul>
</div>
{% if domain_blacklist_enabled %}
<div class="alert alert-info py-2" role="alert">Domain-wide blacklist is enabled for {{ domain_key }}.</div>
{% endif %}
<label for="blacklist_title" class="col-sm-6 col-form-label">Blacklist - Title</label>
<input name="blacklist_title" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_title" value="{%- if blacklist_title -%}{{ blacklist_title }}{%- endif -%}" />
<input name="blacklist_title"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_title"
value="{%- if blacklist_title -%}{{ blacklist_title }}{%- endif -%}" />
<label for="blacklist_summary" class="col-sm-6 col-form-label">Blacklist - Summary</label>
<input name="blacklist_summary" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_summary" value="{%- if blacklist_summary -%}{{ blacklist_summary }}{%- endif -%}" />
<input name="blacklist_summary"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_summary"
value="{%- if blacklist_summary -%}{{ blacklist_summary }}{%- endif -%}" />
<label for="blacklist_content" class="col-sm-6 col-form-label">Blacklist - Content</label>
<input name="blacklist_content" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_content" value="{%- if blacklist_content -%}{{ blacklist_content }}{%- endif -%}" />
<input name="blacklist_content"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_content"
value="{%- if blacklist_content -%}{{ blacklist_content }}{%- endif -%}" />
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
<input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
<input name="blacklist_author"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_author"
value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
<div class="mt-4">
<div class="form-text">
<ul class="list-inline">
@ -54,7 +68,7 @@
<li>
Examples:
<code>
<pre>
<pre>
^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*
@ -64,27 +78,42 @@
</ul>
</div>
<label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label>
<textarea name="regex_blacklist_title" class="form-control bg-dark border-dark text-muted"
<textarea name="regex_blacklist_title"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_title"
rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea>
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">Regex Blacklist -
Summary</label>
<textarea name="regex_blacklist_summary" class="form-control bg-dark border-dark text-muted"
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">
Regex Blacklist -
Summary
</label>
<textarea name="regex_blacklist_summary"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_summary"
rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea>
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">Regex Blacklist -
Content</label>
<textarea name="regex_blacklist_content" class="form-control bg-dark border-dark text-muted"
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">
Regex Blacklist -
Content
</label>
<textarea name="regex_blacklist_content"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_content"
rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea>
<label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label>
<textarea name="regex_blacklist_author" class="form-control bg-dark border-dark text-muted"
<textarea name="regex_blacklist_author"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_author"
rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea>
</div>
<div class="form-check mt-3">
<input class="form-check-input"
type="checkbox"
value="true"
id="apply_to_domain"
name="apply_to_domain">
<label class="form-check-label" for="apply_to_domain">
Apply these blacklist values to all feeds on {{ domain_name }} ({{ domain_key }})
</label>
</div>
</div>
</div>
<!-- Add a hidden feed_url field to the form -->
@ -94,5 +123,5 @@
<button class="btn btn-dark btn-sm">Update blacklist</button>
</div>
</form>
</div>
</div>
{% endblock content %}

View file

@ -59,8 +59,16 @@
<!-- Display domains and their feeds -->
{% for domain, domain_feeds in domains.items() %}
<div class="card bg-dark border border-dark mb-2">
<div class="card-header">
<div class="card-header d-flex justify-content-between align-items-center gap-2">
<h3 class="h6 mb-0 text-white-50">{{ domain }} ({{ domain_feeds|length }})</h3>
{% if domain_feeds %}
<div class="d-flex gap-2">
<a class="btn btn-outline-light btn-sm"
href="/whitelist?feed_url={{ domain_feeds[0].url|encode_url }}">Domain whitelist</a>
<a class="btn btn-outline-light btn-sm"
href="/blacklist?feed_url={{ domain_feeds[0].url|encode_url }}">Domain blacklist</a>
</div>
{% endif %}
</div>
<div class="card-body p-2">
<ul class="list-group list-unstyled mb-0">

View file

@ -1,9 +1,9 @@
{% extends "base.html" %}
{% block title %}
| Whitelist
| Whitelist
{% endblock title %}
{% block content %}
<div class="p-2 border border-dark">
<div class="p-2 border border-dark">
<form action="/whitelist" method="post">
<!-- Feed URL -->
<div class="row pb-2">
@ -30,19 +30,33 @@
</li>
</ul>
</div>
{% if domain_whitelist_enabled %}
<div class="alert alert-info py-2" role="alert">Domain-wide whitelist is enabled for {{ domain_key }}.</div>
{% endif %}
<label for="whitelist_title" class="col-sm-6 col-form-label">Whitelist - Title</label>
<input name="whitelist_title" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_title" value="{%- if whitelist_title -%}{{ whitelist_title }} {%- endif -%}" />
<input name="whitelist_title"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_title"
value="{%- if whitelist_title -%}{{ whitelist_title }} {%- endif -%}" />
<label for="whitelist_summary" class="col-sm-6 col-form-label">Whitelist - Summary</label>
<input name="whitelist_summary" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_summary" value="{%- if whitelist_summary -%}{{ whitelist_summary }}{%- endif -%}" />
<input name="whitelist_summary"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_summary"
value="{%- if whitelist_summary -%}{{ whitelist_summary }}{%- endif -%}" />
<label for="whitelist_content" class="col-sm-6 col-form-label">Whitelist - Content</label>
<input name="whitelist_content" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_content" value="{%- if whitelist_content -%}{{ whitelist_content }}{%- endif -%}" />
<input name="whitelist_content"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_content"
value="{%- if whitelist_content -%}{{ whitelist_content }}{%- endif -%}" />
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
<input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
<input name="whitelist_author"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_author"
value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
<div class="mt-4">
<div class="form-text">
<ul class="list-inline">
@ -54,7 +68,7 @@
<li>
Examples:
<code>
<pre>
<pre>
^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*
@ -64,27 +78,42 @@
</ul>
</div>
<label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label>
<textarea name="regex_whitelist_title" class="form-control bg-dark border-dark text-muted"
<textarea name="regex_whitelist_title"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_title"
rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea>
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">Regex Whitelist -
Summary</label>
<textarea name="regex_whitelist_summary" class="form-control bg-dark border-dark text-muted"
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">
Regex Whitelist -
Summary
</label>
<textarea name="regex_whitelist_summary"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_summary"
rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea>
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">Regex Whitelist -
Content</label>
<textarea name="regex_whitelist_content" class="form-control bg-dark border-dark text-muted"
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">
Regex Whitelist -
Content
</label>
<textarea name="regex_whitelist_content"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_content"
rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea>
<label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label>
<textarea name="regex_whitelist_author" class="form-control bg-dark border-dark text-muted"
<textarea name="regex_whitelist_author"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_author"
rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea>
</div>
<div class="form-check mt-3">
<input class="form-check-input"
type="checkbox"
value="true"
id="apply_to_domain"
name="apply_to_domain">
<label class="form-check-label" for="apply_to_domain">
Apply these whitelist values to all feeds on {{ domain_name }} ({{ domain_key }})
</label>
</div>
</div>
</div>
<!-- Add a hidden feed_url field to the form -->
@ -94,5 +123,5 @@
<button class="btn btn-dark btn-sm">Update whitelist</button>
</div>
</form>
</div>
</div>
{% endblock content %}

View file

@ -203,3 +203,33 @@ def test_regex_should_be_skipped() -> None:
)
reader.delete_tag(feed, "regex_blacklist_author")
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
def test_domain_blacklist_should_be_skipped() -> None:
"""Domain-wide blacklist should apply to feeds on the same domain."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
entries: Iterable[Entry] = reader.get_entries(feed=feed)
first_entry: Entry | None = next(iter(entries), None)
assert first_entry is not None, "Expected at least one entry"
assert feed_has_blacklist_tags(reader, feed) is False, "Feed should not have blacklist tags"
assert entry_should_be_skipped(reader, first_entry) is False, "Entry should not be skipped"
reader.set_tag(
(),
"domain_blacklist",
{
"lovinator.space": {
"blacklist_author": "TheLovinator",
"regex_blacklist_title": r"fvnnn\\w+",
},
},
) # pyright: ignore[reportArgumentType]
assert feed_has_blacklist_tags(reader, feed) is True, "Domain blacklist should count as blacklist tags"
assert entry_should_be_skipped(reader, first_entry) is True, "Entry should be skipped by domain blacklist"

View file

@ -3,6 +3,8 @@ from __future__ import annotations
import asyncio
import os
import tempfile
from datetime import UTC
from datetime import datetime
from pathlib import Path
from typing import LiteralString
from unittest.mock import MagicMock
@ -190,6 +192,67 @@ def test_get_entry_delivery_mode_falls_back_to_legacy_embed_flag() -> None:
assert result == "text"
@patch("discord_rss_bot.feeds.execute_webhook")
@patch("discord_rss_bot.feeds.create_text_webhook")
@patch("discord_rss_bot.feeds.should_be_sent", return_value=True)
@patch("discord_rss_bot.feeds.has_white_tags", return_value=True)
@patch("discord_rss_bot.feeds.entry_should_be_skipped", return_value=True)
def test_send_to_discord_whitelist_precedence_over_blacklist(
mock_entry_should_be_skipped: MagicMock,
mock_has_white_tags: MagicMock,
mock_should_be_sent: MagicMock,
mock_create_text_webhook: MagicMock,
mock_execute_webhook: MagicMock,
) -> None:
"""When whitelist is configured and matches, entry should still be sent even if blacklist matches."""
reader = MagicMock()
feed = MagicMock()
feed.url = "https://example.com/feed.xml"
entry = MagicMock()
entry.id = "entry-1"
entry.feed = feed
entry.feed_url = feed.url
entry.added = datetime.now(tz=UTC)
reader.get_entries.return_value = [entry]
def get_tag_side_effect(
resource: str | Feed,
key: str,
default: str | None = None,
) -> str | None:
"""Side effect function for reader.get_tag to return specific values based on the key.
Args:
resource: The resource for which the tag is being requested (ignored in this case).
key: The tag key being requested.
default: The default value to return if the key is not found.
Returns:
- "https://discord.test/webhook" for "webhook" key
- "text" for "delivery_mode" key
- default value for any other key
"""
if key == "webhook":
return "https://discord.test/webhook"
if key == "delivery_mode":
return "text"
return default
reader.get_tag.side_effect = get_tag_side_effect
webhook = MagicMock()
mock_create_text_webhook.return_value = webhook
send_to_discord(reader=reader, feed=feed, do_once=True)
mock_has_white_tags.assert_called_once_with(reader, feed)
mock_should_be_sent.assert_called_once_with(reader, entry)
mock_entry_should_be_skipped.assert_not_called()
mock_execute_webhook.assert_called_once_with(webhook, entry, reader=reader)
@patch("discord_rss_bot.feeds.execute_webhook")
@patch("discord_rss_bot.feeds.create_text_webhook")
@patch("discord_rss_bot.feeds.create_hoyolab_webhook")

View file

@ -173,9 +173,11 @@ def test_export_state_creates_state_json(tmp_path: Path) -> None:
tag: str | None = None,
default: str | None = None,
) -> list[Any] | str | None:
if feed_or_key == () and tag is None:
# Called for global webhooks list
return []
if feed_or_key == () and tag == "domain_blacklist":
return {"example.com": {"blacklist_title": "spoiler"}}
if feed_or_key == () and tag == "domain_whitelist":
return {"example.com": {"whitelist_title": "release"}}
if tag == "webhook":
return "https://discord.com/api/webhooks/123/abc"
@ -194,6 +196,8 @@ def test_export_state_creates_state_json(tmp_path: Path) -> None:
data: dict[str, Any] = json.loads(state_file.read_text(encoding="utf-8"))
assert "feeds" in data
assert "webhooks" in data
assert data["domain_blacklist"]["example.com"]["blacklist_title"] == "spoiler"
assert data["domain_whitelist"]["example.com"]["whitelist_title"] == "release"
assert data["feeds"][0]["url"] == "https://example.com/feed.rss"
assert data["feeds"][0]["webhook"] == "https://discord.com/api/webhooks/123/abc"

View file

@ -221,6 +221,264 @@ def test_get() -> None:
assert response.status_code == 200, f"/whitelist failed: {response.text}"
def test_post_blacklist_apply_to_domain_updates_global_domain_blacklist() -> None:
"""Posting blacklist with apply_to_domain should save domain-wide blacklist values."""
reader: Reader = get_reader_dependency()
# Ensure webhook exists and feed can be created.
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
response = client.post(
url="/blacklist",
data={
"feed_url": feed_url,
"blacklist_author": "TheLovinator",
"apply_to_domain": "true",
},
)
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
domain_blacklist = reader.get_tag((), "domain_blacklist", {})
assert isinstance(domain_blacklist, dict), "domain_blacklist should be a dict"
assert "lovinator.space" in domain_blacklist, "Expected domain key in domain_blacklist"
assert domain_blacklist["lovinator.space"]["blacklist_author"] == "TheLovinator"
def test_post_whitelist_apply_to_domain_updates_global_domain_whitelist() -> None:
"""Posting whitelist with apply_to_domain should save domain-wide whitelist values."""
reader: Reader = get_reader_dependency()
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
response = client.post(
url="/whitelist",
data={
"feed_url": feed_url,
"whitelist_author": "TheLovinator",
"apply_to_domain": "true",
},
)
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
domain_whitelist = reader.get_tag((), "domain_whitelist", {})
assert isinstance(domain_whitelist, dict), "domain_whitelist should be a dict"
assert "lovinator.space" in domain_whitelist, "Expected domain key in domain_whitelist"
assert domain_whitelist["lovinator.space"]["whitelist_author"] == "TheLovinator"
def test_domain_filter_pages_show_domain_enabled_notice() -> None:
"""Blacklist and whitelist pages should show domain-wide enabled notices when configured."""
reader: Reader = get_reader_dependency()
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
reader.set_tag(
(),
"domain_blacklist",
{"lovinator.space": {"blacklist_title": "spoiler"}},
) # pyright: ignore[reportArgumentType]
reader.set_tag(
(),
"domain_whitelist",
{"lovinator.space": {"whitelist_title": "release"}},
) # pyright: ignore[reportArgumentType]
response = client.get(url="/blacklist", params={"feed_url": encoded_feed_url(feed_url)})
assert response.status_code == 200, f"/blacklist failed: {response.text}"
assert "Domain-wide blacklist is enabled for lovinator.space." in response.text
response = client.get(url="/whitelist", params={"feed_url": encoded_feed_url(feed_url)})
assert response.status_code == 200, f"/whitelist failed: {response.text}"
assert "Domain-wide whitelist is enabled for lovinator.space." in response.text
def test_domain_blacklist_isolation_between_domains() -> None:
"""Applying domain blacklist should not overwrite other domains."""
reader: Reader = get_reader_dependency()
reader.set_tag((), "domain_blacklist", {"example.com": {"blacklist_title": "existing"}}) # pyright: ignore[reportArgumentType]
response = client.post(
url="/blacklist",
data={
"feed_url": feed_url,
"blacklist_author": "TheLovinator",
"apply_to_domain": "true",
},
)
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
domain_blacklist = reader.get_tag((), "domain_blacklist", {})
assert isinstance(domain_blacklist, dict)
assert domain_blacklist["example.com"]["blacklist_title"] == "existing"
assert domain_blacklist["lovinator.space"]["blacklist_author"] == "TheLovinator"
def test_domain_whitelist_isolation_between_domains() -> None:
"""Applying domain whitelist should not overwrite other domains."""
reader: Reader = get_reader_dependency()
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
reader.set_tag((), "domain_whitelist", {"example.com": {"whitelist_title": "existing"}}) # pyright: ignore[reportArgumentType]
response = client.post(
url="/whitelist",
data={
"feed_url": feed_url,
"whitelist_author": "TheLovinator",
"apply_to_domain": "true",
},
)
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
domain_whitelist = reader.get_tag((), "domain_whitelist", {})
assert isinstance(domain_whitelist, dict)
assert domain_whitelist["example.com"]["whitelist_title"] == "existing"
assert domain_whitelist["lovinator.space"]["whitelist_author"] == "TheLovinator"
def test_domain_blacklist_removed_when_apply_to_domain_and_empty_values() -> None:
"""Submitting empty domain blacklist values should remove existing domain entry."""
reader: Reader = get_reader_dependency()
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
reader.set_tag((), "domain_blacklist", {"lovinator.space": {"blacklist_title": "existing"}}) # pyright: ignore[reportArgumentType]
response = client.post(
url="/blacklist",
data={"feed_url": feed_url, "apply_to_domain": "true"},
)
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
domain_blacklist = reader.get_tag((), "domain_blacklist", {})
assert isinstance(domain_blacklist, dict)
assert "lovinator.space" not in domain_blacklist
def test_domain_whitelist_removed_when_apply_to_domain_and_empty_values() -> None:
"""Submitting empty domain whitelist values should remove existing domain entry."""
reader: Reader = get_reader_dependency()
reader.set_tag((), "domain_whitelist", {"lovinator.space": {"whitelist_title": "existing"}}) # pyright: ignore[reportArgumentType]
response = client.post(
url="/whitelist",
data={"feed_url": feed_url, "apply_to_domain": "true"},
)
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
domain_whitelist = reader.get_tag((), "domain_whitelist", {})
assert isinstance(domain_whitelist, dict)
assert "lovinator.space" not in domain_whitelist
def test_apply_to_domain_missing_does_not_update_domain_tags() -> None:
"""When apply_to_domain is omitted, domain tags should not change."""
reader: Reader = get_reader_dependency()
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
reader.set_tag((), "domain_blacklist", {}) # pyright: ignore[reportArgumentType]
reader.set_tag((), "domain_whitelist", {}) # pyright: ignore[reportArgumentType]
response = client.post(
url="/blacklist",
data={"feed_url": feed_url, "blacklist_author": "TheLovinator"},
)
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
response = client.post(
url="/whitelist",
data={"feed_url": feed_url, "whitelist_author": "TheLovinator"},
)
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
assert reader.get_tag((), "domain_blacklist", {}) == {}
assert reader.get_tag((), "domain_whitelist", {}) == {}
def test_apply_to_domain_invalid_value_rejected() -> None:
"""Invalid boolean value for apply_to_domain should return validation error."""
response = client.post(
url="/blacklist",
data={
"feed_url": feed_url,
"blacklist_author": "TheLovinator",
"apply_to_domain": "invalid-bool",
},
)
assert response.status_code == 422, f"Expected 422 for invalid boolean: {response.text}"
def test_index_shows_domain_filter_shortcuts() -> None:
"""Index should show domain whitelist/blacklist shortcut buttons."""
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
response: Response = client.post(
url="/add_webhook",
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
)
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
client.post(url="/remove", data={"feed_url": feed_url})
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
assert response.status_code == 200, f"Failed to add feed: {response.text}"
response = client.get(url="/")
assert response.status_code == 200, f"Failed to get /: {response.text}"
assert "Domain whitelist" in response.text
assert "Domain blacklist" in response.text
assert f"/whitelist?feed_url={encoded_feed_url(feed_url)}" in response.text
assert f"/blacklist?feed_url={encoded_feed_url(feed_url)}" in response.text
def test_settings_page_shows_screenshot_layout_setting() -> None:
response: Response = client.get(url="/settings")
assert response.status_code == 200, f"/settings failed: {response.text}"

View file

@ -184,3 +184,33 @@ def test_regex_should_be_sent() -> None:
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
reader.delete_tag(feed, "regex_whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
def test_domain_whitelist_should_be_sent() -> None:
"""Domain-wide whitelist should apply to feeds on the same domain."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
entries: Iterable[Entry] = reader.get_entries(feed=feed)
first_entry: Entry | None = next(iter(entries), None)
assert first_entry is not None, "Expected at least one entry"
assert has_white_tags(reader, feed) is False, "Feed should not have whitelist tags"
assert should_be_sent(reader, first_entry) is False, "Entry should not be sent"
reader.set_tag(
(),
"domain_whitelist",
{
"lovinator.space": {
"whitelist_author": "TheLovinator",
"regex_whitelist_title": r"fvnnn\\w+",
},
},
) # pyright: ignore[reportArgumentType]
assert has_white_tags(reader, feed) is True, "Domain whitelist should count as whitelist tags"
assert should_be_sent(reader, first_entry) is True, "Entry should be sent by domain whitelist"