From bdbd46ebd45091184f3ade04e8b922e125488978 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Sun, 12 Apr 2026 23:51:05 +0200 Subject: [PATCH] Add domain-wide blacklist and whitelist functionality --- discord_rss_bot/feeds.py | 13 +- discord_rss_bot/filter/blacklist.py | 101 +++++---- discord_rss_bot/filter/utils.py | 58 +++++ discord_rss_bot/filter/whitelist.py | 94 +++++---- discord_rss_bot/git_backup.py | 76 ++++--- discord_rss_bot/main.py | 114 ++++++++-- discord_rss_bot/templates/blacklist.html | 191 ++++++++++------- discord_rss_bot/templates/index.html | 10 +- discord_rss_bot/templates/whitelist.html | 191 ++++++++++------- tests/test_blacklist.py | 30 +++ tests/test_feeds.py | 63 ++++++ tests/test_git_backup.py | 10 +- tests/test_main.py | 258 +++++++++++++++++++++++ tests/test_whitelist.py | 30 +++ 14 files changed, 932 insertions(+), 307 deletions(-) diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py index 90c7af1..b5cf82d 100644 --- a/discord_rss_bot/feeds.py +++ b/discord_rss_bot/feeds.py @@ -711,16 +711,15 @@ def send_to_discord(reader: Reader | None = None, feed: Feed | None = None, *, d use_default_message_on_empty=True, ) - # Check if the entry is blacklisted, and if it is, we will skip it. - if entry_should_be_skipped(effective_reader, entry): + # Whitelist should take precedence when configured. + if has_white_tags(effective_reader, entry.feed): + if not should_be_sent(effective_reader, entry): + logger.info("Entry was not whitelisted: %s", entry.id) + continue + elif entry_should_be_skipped(effective_reader, entry): logger.info("Entry was blacklisted: %s", entry.id) continue - # Check if the feed has a whitelist, and if it does, check if the entry is whitelisted. - if has_white_tags(effective_reader, entry.feed) and not should_be_sent(effective_reader, entry): - logger.info("Entry was not whitelisted: %s", entry.id) - continue - # Use a custom webhook for Hoyolab feeds. if is_c3kay_feed(entry.feed.url): entry_link: str | None = entry.link diff --git a/discord_rss_bot/filter/blacklist.py b/discord_rss_bot/filter/blacklist.py index 8260993..ef61fcb 100644 --- a/discord_rss_bot/filter/blacklist.py +++ b/discord_rss_bot/filter/blacklist.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +from discord_rss_bot.filter.utils import get_domain_filter_tags from discord_rss_bot.filter.utils import is_regex_match from discord_rss_bot.filter.utils import is_word_in_text @@ -11,6 +12,37 @@ if TYPE_CHECKING: from reader import Reader +_MATCH_FIELDS: tuple[str, ...] = ("title", "summary", "content", "author") + + +def _get_effective_blacklist_values(reader: Reader, feed: Feed) -> tuple[dict[str, str], dict[str, str]]: + """Return merged feed-level and domain-level blacklist values.""" + local_values: dict[str, str] = { + field: str(reader.get_tag(feed, f"blacklist_{field}", "")).strip() for field in _MATCH_FIELDS + } + local_regex_values: dict[str, str] = { + field: str(reader.get_tag(feed, f"regex_blacklist_{field}", "")).strip() for field in _MATCH_FIELDS + } + + domain_values_raw: dict[str, str] = get_domain_filter_tags(reader, feed, "domain_blacklist") + domain_values: dict[str, str] = { + field: str(domain_values_raw.get(f"blacklist_{field}", "")).strip() for field in _MATCH_FIELDS + } + domain_regex_values: dict[str, str] = { + field: str(domain_values_raw.get(f"regex_blacklist_{field}", "")).strip() for field in _MATCH_FIELDS + } + + merged_values: dict[str, str] = { + field: ",".join(value for value in (local_values[field], domain_values[field]) if value) + for field in _MATCH_FIELDS + } + merged_regex_values: dict[str, str] = { + field: "\n".join(value for value in (local_regex_values[field], domain_regex_values[field]) if value) + for field in _MATCH_FIELDS + } + return merged_values, merged_regex_values + + def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool: """Return True if the feed has blacklist tags. @@ -31,26 +63,8 @@ def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool: Returns: bool: If the feed has any of the tags. """ - blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip() - blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip() - blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip() - blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip() - - regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip() - regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip() - regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip() - regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip() - - return bool( - blacklist_title - or blacklist_author - or blacklist_content - or blacklist_summary - or regex_blacklist_author - or regex_blacklist_content - or regex_blacklist_summary - or regex_blacklist_title, - ) + merged_values, merged_regex_values = _get_effective_blacklist_values(reader, feed) + return any(merged_values.values()) or any(merged_regex_values.values()) def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 @@ -63,58 +77,55 @@ def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0 Returns: bool: If the entry is in the blacklist. """ - feed = entry.feed - - blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip() - blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip() - blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip() - blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip() - - regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip() - regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip() - regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip() - regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip() + merged_values, merged_regex_values = _get_effective_blacklist_values(reader, entry.feed) # TODO(TheLovinator): Also add support for entry_text and more. # Check regular blacklist - if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title): + if entry.title and merged_values["title"] and is_word_in_text(merged_values["title"], entry.title): return True - if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary): + if entry.summary and merged_values["summary"] and is_word_in_text(merged_values["summary"], entry.summary): return True if ( entry.content and entry.content[0].value - and blacklist_content - and is_word_in_text(blacklist_content, entry.content[0].value) + and merged_values["content"] + and is_word_in_text(merged_values["content"], entry.content[0].value) ): return True - if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author): + if entry.author and merged_values["author"] and is_word_in_text(merged_values["author"], entry.author): return True if ( entry.content and entry.content[0].value - and blacklist_content - and is_word_in_text(blacklist_content, entry.content[0].value) + and merged_values["content"] + and is_word_in_text(merged_values["content"], entry.content[0].value) ): return True # Check regex blacklist - if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title): + if entry.title and merged_regex_values["title"] and is_regex_match(merged_regex_values["title"], entry.title): return True - if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary): + if ( + entry.summary + and merged_regex_values["summary"] + and is_regex_match( + merged_regex_values["summary"], + entry.summary, + ) + ): return True if ( entry.content and entry.content[0].value - and regex_blacklist_content - and is_regex_match(regex_blacklist_content, entry.content[0].value) + and merged_regex_values["content"] + and is_regex_match(merged_regex_values["content"], entry.content[0].value) ): return True - if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author): + if entry.author and merged_regex_values["author"] and is_regex_match(merged_regex_values["author"], entry.author): return True return bool( entry.content and entry.content[0].value - and regex_blacklist_content - and is_regex_match(regex_blacklist_content, entry.content[0].value), + and merged_regex_values["content"] + and is_regex_match(merged_regex_values["content"], entry.content[0].value), ) diff --git a/discord_rss_bot/filter/utils.py b/discord_rss_bot/filter/utils.py index ff93e59..d25e398 100644 --- a/discord_rss_bot/filter/utils.py +++ b/discord_rss_bot/filter/utils.py @@ -2,6 +2,14 @@ from __future__ import annotations import logging import re +from typing import TYPE_CHECKING +from urllib.parse import urlparse + +import tldextract + +if TYPE_CHECKING: + from reader import Feed + from reader import Reader logger: logging.Logger = logging.getLogger(__name__) @@ -70,3 +78,53 @@ def is_regex_match(regex_string: str, text: str) -> bool: logger.info("No regex patterns matched.") return False + + +def get_domain_key(url: str) -> str: + """Return a normalized domain key used for domain-wide filters. + + Args: + url: The URL to extract the domain from. + + Returns: + str: A normalized domain key (e.g. ``example.com``). + """ + if not url: + return "" + + parsed_url = urlparse(url) + host: str = parsed_url.netloc.lower().strip() + host = host.removeprefix("www.") + + if not host: + return "" + + ext = tldextract.extract(host) + top_domain: str = ext.top_domain_under_public_suffix + return top_domain or host + + +def get_domain_filter_tags(reader: Reader, feed: Feed, tag_name: str) -> dict[str, str]: + """Return domain-wide filter tags for a feed. + + Args: + reader: Reader instance. + feed: Feed instance. + tag_name: Global tag name that stores domain filters. + + Returns: + dict[str, str]: Domain filter values for the feed's domain. + """ + domain_key: str = get_domain_key(str(feed.url)) + if not domain_key: + return {} + + domain_filters: object = reader.get_tag((), tag_name, {}) + if not isinstance(domain_filters, dict): + return {} + + values: object = domain_filters.get(domain_key, {}) + if not isinstance(values, dict): + return {} + + return {str(key): str(value) for key, value in values.items() if isinstance(key, str)} diff --git a/discord_rss_bot/filter/whitelist.py b/discord_rss_bot/filter/whitelist.py index bb5303d..1d8de85 100644 --- a/discord_rss_bot/filter/whitelist.py +++ b/discord_rss_bot/filter/whitelist.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +from discord_rss_bot.filter.utils import get_domain_filter_tags from discord_rss_bot.filter.utils import is_regex_match from discord_rss_bot.filter.utils import is_word_in_text @@ -11,6 +12,37 @@ if TYPE_CHECKING: from reader import Reader +_MATCH_FIELDS: tuple[str, ...] = ("title", "summary", "content", "author") + + +def _get_effective_whitelist_values(reader: Reader, feed: Feed) -> tuple[dict[str, str], dict[str, str]]: + """Return merged feed-level and domain-level whitelist values.""" + local_values: dict[str, str] = { + field: str(reader.get_tag(feed, f"whitelist_{field}", "")).strip() for field in _MATCH_FIELDS + } + local_regex_values: dict[str, str] = { + field: str(reader.get_tag(feed, f"regex_whitelist_{field}", "")).strip() for field in _MATCH_FIELDS + } + + domain_values_raw: dict[str, str] = get_domain_filter_tags(reader, feed, "domain_whitelist") + domain_values: dict[str, str] = { + field: str(domain_values_raw.get(f"whitelist_{field}", "")).strip() for field in _MATCH_FIELDS + } + domain_regex_values: dict[str, str] = { + field: str(domain_values_raw.get(f"regex_whitelist_{field}", "")).strip() for field in _MATCH_FIELDS + } + + merged_values: dict[str, str] = { + field: ",".join(value for value in (local_values[field], domain_values[field]) if value) + for field in _MATCH_FIELDS + } + merged_regex_values: dict[str, str] = { + field: "\n".join(value for value in (local_regex_values[field], domain_regex_values[field]) if value) + for field in _MATCH_FIELDS + } + return merged_values, merged_regex_values + + def has_white_tags(reader: Reader, feed: Feed) -> bool: """Return True if the feed has whitelist tags. @@ -31,26 +63,8 @@ def has_white_tags(reader: Reader, feed: Feed) -> bool: Returns: bool: If the feed has any of the tags. """ - whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip() - whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip() - whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip() - whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip() - - regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip() - regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip() - regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip() - regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip() - - return bool( - whitelist_title - or whitelist_author - or whitelist_content - or whitelist_summary - or regex_whitelist_author - or regex_whitelist_content - or regex_whitelist_summary - or regex_whitelist_title, - ) + merged_values, merged_regex_values = _get_effective_whitelist_values(reader, feed) + return any(merged_values.values()) or any(merged_regex_values.values()) def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 @@ -63,44 +77,40 @@ def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 Returns: bool: If the entry is in the whitelist. """ - feed: Feed = entry.feed - # Regular whitelist tags - whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip() - whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip() - whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip() - whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip() - - # Regex whitelist tags - regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip() - regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip() - regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip() - regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip() + merged_values, merged_regex_values = _get_effective_whitelist_values(reader, entry.feed) # Check regular whitelist - if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title): + if entry.title and merged_values["title"] and is_word_in_text(merged_values["title"], entry.title): return True - if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary): + if entry.summary and merged_values["summary"] and is_word_in_text(merged_values["summary"], entry.summary): return True - if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author): + if entry.author and merged_values["author"] and is_word_in_text(merged_values["author"], entry.author): return True if ( entry.content and entry.content[0].value - and whitelist_content - and is_word_in_text(whitelist_content, entry.content[0].value) + and merged_values["content"] + and is_word_in_text(merged_values["content"], entry.content[0].value) ): return True # Check regex whitelist - if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title): + if entry.title and merged_regex_values["title"] and is_regex_match(merged_regex_values["title"], entry.title): return True - if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary): + if ( + entry.summary + and merged_regex_values["summary"] + and is_regex_match( + merged_regex_values["summary"], + entry.summary, + ) + ): return True - if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author): + if entry.author and merged_regex_values["author"] and is_regex_match(merged_regex_values["author"], entry.author): return True return bool( entry.content and entry.content[0].value - and regex_whitelist_content - and is_regex_match(regex_whitelist_content, entry.content[0].value), + and merged_regex_values["content"] + and is_regex_match(merged_regex_values["content"], entry.content[0].value), ) diff --git a/discord_rss_bot/git_backup.py b/discord_rss_bot/git_backup.py index febc34c..9b1c585 100644 --- a/discord_rss_bot/git_backup.py +++ b/discord_rss_bot/git_backup.py @@ -28,7 +28,7 @@ import shutil import subprocess # noqa: S404 from pathlib import Path from typing import TYPE_CHECKING -from typing import Any +from typing import cast if TYPE_CHECKING: from reader import Reader @@ -37,11 +37,8 @@ logger: logging.Logger = logging.getLogger(__name__) GIT_EXECUTABLE: str = shutil.which("git") or "git" -type TAG_VALUE = ( - dict[str, str | int | float | bool | dict[str, Any] | list[Any] | None] - | list[str | int | float | bool | dict[str, Any] | list[Any] | None] - | None -) +type JsonScalar = str | int | float | bool | None +type JsonLike = JsonScalar | dict[str, JsonLike] | list[JsonLike] # Tags that are exported per-feed (empty values are omitted). _FEED_TAGS: tuple[str, ...] = ( @@ -157,47 +154,68 @@ def setup_backup_repo(backup_path: Path) -> bool: return True -def export_state(reader: Reader, backup_path: Path) -> None: - """Serialise the current bot state to ``state.json`` inside *backup_path*. +def _build_feed_state(reader: Reader) -> list[JsonLike]: + """Collect feed and per-feed tag state. - Args: - reader: The :class:`reader.Reader` instance to read state from. - backup_path: Destination directory for the exported ``state.json``. + Returns: + A list of dictionaries containing feed URLs and their associated tag values. """ - feeds_state: list[dict] = [] + feeds_state: list[JsonLike] = [] for feed in reader.get_feeds(): - feed_data: dict = {"url": feed.url} + feed_data: dict[str, JsonLike] = {"url": feed.url} for tag in _FEED_TAGS: try: - value: TAG_VALUE = reader.get_tag(feed, tag, None) + value: JsonLike | None = cast("JsonLike | None", reader.get_tag(feed, tag, None)) if value is not None and value != "": # noqa: PLC1901 feed_data[tag] = value except Exception: logger.exception("Failed to read tag '%s' for feed '%s' during state export", tag, feed.url) feeds_state.append(feed_data) + return feeds_state - webhooks: list[str | int | float | bool | dict[str, Any] | list[Any] | None] = list( - reader.get_tag((), "webhooks", []), - ) - # Export global update interval if set - global_update_interval: dict[str, Any] | None = None - global_update_config = reader.get_tag((), ".reader.update", None) - if isinstance(global_update_config, dict): - global_update_interval = global_update_config +def _get_global_dict_tag(reader: Reader, tag_name: str) -> dict[str, JsonLike] | None: + """Return a global tag value if it is a dictionary.""" + tag_value: JsonLike | None = cast("JsonLike | None", reader.get_tag((), tag_name, None)) + return tag_value if isinstance(tag_value, dict) else None - global_screenshot_layout: str | None = None - screenshot_layout = reader.get_tag((), "screenshot_layout", None) - if isinstance(screenshot_layout, str): - clean_layout = screenshot_layout.strip().lower() - if clean_layout in {"desktop", "mobile"}: - global_screenshot_layout = clean_layout - state: dict = {"feeds": feeds_state, "webhooks": webhooks} +def _get_global_screenshot_layout(reader: Reader) -> str | None: + """Return normalized global screenshot layout if valid.""" + screenshot_layout: JsonLike | None = cast("JsonLike | None", reader.get_tag((), "screenshot_layout", None)) + if not isinstance(screenshot_layout, str): + return None + + clean_layout: str = screenshot_layout.strip().lower() + return clean_layout if clean_layout in {"desktop", "mobile"} else None + + +def export_state(reader: Reader, backup_path: Path) -> None: + """Serialize the current bot state to ``state.json`` inside *backup_path*. + + Args: + reader: The :class:`reader.Reader` instance to read state from. + backup_path: Destination directory for the exported ``state.json``. + """ + feeds_state: list[JsonLike] = _build_feed_state(reader) + + webhooks_raw: JsonLike | None = cast("JsonLike | None", reader.get_tag((), "webhooks", [])) + webhooks: list[JsonLike] = webhooks_raw if isinstance(webhooks_raw, list) else [] + + global_update_interval: dict[str, JsonLike] | None = _get_global_dict_tag(reader, ".reader.update") + global_screenshot_layout: str | None = _get_global_screenshot_layout(reader) + domain_blacklist: dict[str, JsonLike] | None = _get_global_dict_tag(reader, "domain_blacklist") + domain_whitelist: dict[str, JsonLike] | None = _get_global_dict_tag(reader, "domain_whitelist") + + state: dict[str, JsonLike] = {"feeds": feeds_state, "webhooks": webhooks} if global_update_interval is not None: state["global_update_interval"] = global_update_interval if global_screenshot_layout is not None: state["global_screenshot_layout"] = global_screenshot_layout + if domain_blacklist is not None: + state["domain_blacklist"] = domain_blacklist + if domain_whitelist is not None: + state["domain_whitelist"] = domain_whitelist state_file: Path = backup_path / "state.json" state_file.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8") diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py index 961c70e..8b02323 100644 --- a/discord_rss_bot/main.py +++ b/discord_rss_bot/main.py @@ -54,6 +54,7 @@ from discord_rss_bot.feeds import get_feed_delivery_mode from discord_rss_bot.feeds import get_screenshot_layout from discord_rss_bot.feeds import send_entry_to_discord from discord_rss_bot.feeds import send_to_discord +from discord_rss_bot.filter.utils import get_domain_key from discord_rss_bot.git_backup import commit_state_change from discord_rss_bot.git_backup import get_backup_path from discord_rss_bot.is_url_valid import is_url_valid @@ -399,6 +400,7 @@ async def post_unpause_feed( @app.post("/whitelist") async def post_set_whitelist( reader: Annotated[Reader, Depends(get_reader_dependency)], + *, whitelist_title: Annotated[str, Form()] = "", whitelist_summary: Annotated[str, Form()] = "", whitelist_content: Annotated[str, Form()] = "", @@ -407,6 +409,7 @@ async def post_set_whitelist( regex_whitelist_summary: Annotated[str, Form()] = "", regex_whitelist_content: Annotated[str, Form()] = "", regex_whitelist_author: Annotated[str, Form()] = "", + apply_to_domain: Annotated[bool, Form()] = False, feed_url: Annotated[str, Form()] = "", ) -> RedirectResponse: """Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent. @@ -420,6 +423,7 @@ async def post_set_whitelist( regex_whitelist_summary: Whitelisted regex for when checking the summary. regex_whitelist_content: Whitelisted regex for when checking the content. regex_whitelist_author: Whitelisted regex for when checking the author. + apply_to_domain: Also store these values as domain-wide whitelist rules. feed_url: The feed we should set the whitelist for. reader: The Reader instance. @@ -427,16 +431,43 @@ async def post_set_whitelist( RedirectResponse: Redirect to the feed page. """ clean_feed_url: str = feed_url.strip() if feed_url else "" - reader.set_tag(clean_feed_url, "whitelist_title", whitelist_title) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_whitelist_title", regex_whitelist_title) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_whitelist_summary", regex_whitelist_summary) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_whitelist_content", regex_whitelist_content) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_whitelist_author", regex_whitelist_author) # pyright: ignore[reportArgumentType][call-overload] + whitelist_values: dict[str, str] = { + "whitelist_title": whitelist_title.strip(), + "whitelist_summary": whitelist_summary.strip(), + "whitelist_content": whitelist_content.strip(), + "whitelist_author": whitelist_author.strip(), + "regex_whitelist_title": regex_whitelist_title.strip(), + "regex_whitelist_summary": regex_whitelist_summary.strip(), + "regex_whitelist_content": regex_whitelist_content.strip(), + "regex_whitelist_author": regex_whitelist_author.strip(), + } - commit_state_change(reader, f"Update whitelist for {clean_feed_url}") + for tag, value in whitelist_values.items(): + reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload] + + message: str = f"Update whitelist for {clean_feed_url}" + if apply_to_domain: + domain_key: str = get_domain_key(clean_feed_url) + if domain_key: + domain_whitelists_raw = reader.get_tag((), "domain_whitelist", {}) + domain_whitelists: dict[str, dict[str, str]] = {} + if isinstance(domain_whitelists_raw, dict): + for existing_domain, existing_values in domain_whitelists_raw.items(): + if isinstance(existing_domain, str) and isinstance(existing_values, dict): + domain_whitelists[existing_domain] = { + str(key): str(value) for key, value in existing_values.items() if isinstance(key, str) + } + + domain_values: dict[str, str] = {k: v for k, v in whitelist_values.items() if v} + if domain_values: + domain_whitelists[domain_key] = domain_values + else: + domain_whitelists.pop(domain_key, None) + + reader.set_tag((), "domain_whitelist", domain_whitelists) # pyright: ignore[reportArgumentType] + message = f"Update whitelist for {clean_feed_url} and domain {domain_key}" + + commit_state_change(reader, message) return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303) @@ -468,6 +499,11 @@ async def get_whitelist( regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")) regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")) regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")) + domain_key: str = get_domain_key(feed.url) + domain_whitelist_raw = reader.get_tag((), "domain_whitelist", {}) + domain_whitelist_enabled: bool = bool( + isinstance(domain_whitelist_raw, dict) and domain_key and domain_key in domain_whitelist_raw, + ) context = { "request": request, @@ -480,6 +516,9 @@ async def get_whitelist( "regex_whitelist_summary": regex_whitelist_summary, "regex_whitelist_content": regex_whitelist_content, "regex_whitelist_author": regex_whitelist_author, + "domain_key": domain_key, + "domain_name": extract_domain(feed.url), + "domain_whitelist_enabled": domain_whitelist_enabled, } return templates.TemplateResponse(request=request, name="whitelist.html", context=context) @@ -487,6 +526,7 @@ async def get_whitelist( @app.post("/blacklist") async def post_set_blacklist( reader: Annotated[Reader, Depends(get_reader_dependency)], + *, blacklist_title: Annotated[str, Form()] = "", blacklist_summary: Annotated[str, Form()] = "", blacklist_content: Annotated[str, Form()] = "", @@ -495,6 +535,7 @@ async def post_set_blacklist( regex_blacklist_summary: Annotated[str, Form()] = "", regex_blacklist_content: Annotated[str, Form()] = "", regex_blacklist_author: Annotated[str, Form()] = "", + apply_to_domain: Annotated[bool, Form()] = False, feed_url: Annotated[str, Form()] = "", ) -> RedirectResponse: """Set the blacklist. @@ -511,6 +552,7 @@ async def post_set_blacklist( regex_blacklist_summary: Blacklisted regex for when checking the summary. regex_blacklist_content: Blacklisted regex for when checking the content. regex_blacklist_author: Blacklisted regex for when checking the author. + apply_to_domain: Also store these values as domain-wide blacklist rules. feed_url: What feed we should set the blacklist for. reader: The Reader instance. @@ -518,15 +560,43 @@ async def post_set_blacklist( RedirectResponse: Redirect to the feed page. """ clean_feed_url: str = feed_url.strip() if feed_url else "" - reader.set_tag(clean_feed_url, "blacklist_title", blacklist_title) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_blacklist_title", regex_blacklist_title) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_blacklist_summary", regex_blacklist_summary) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_blacklist_content", regex_blacklist_content) # pyright: ignore[reportArgumentType][call-overload] - reader.set_tag(clean_feed_url, "regex_blacklist_author", regex_blacklist_author) # pyright: ignore[reportArgumentType][call-overload] - commit_state_change(reader, f"Update blacklist for {clean_feed_url}") + blacklist_values: dict[str, str] = { + "blacklist_title": blacklist_title.strip(), + "blacklist_summary": blacklist_summary.strip(), + "blacklist_content": blacklist_content.strip(), + "blacklist_author": blacklist_author.strip(), + "regex_blacklist_title": regex_blacklist_title.strip(), + "regex_blacklist_summary": regex_blacklist_summary.strip(), + "regex_blacklist_content": regex_blacklist_content.strip(), + "regex_blacklist_author": regex_blacklist_author.strip(), + } + + for tag, value in blacklist_values.items(): + reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload] + + message: str = f"Update blacklist for {clean_feed_url}" + if apply_to_domain: + domain_key: str = get_domain_key(clean_feed_url) + if domain_key: + domain_blacklists_raw = reader.get_tag((), "domain_blacklist", {}) + domain_blacklists: dict[str, dict[str, str]] = {} + if isinstance(domain_blacklists_raw, dict): + for existing_domain, existing_values in domain_blacklists_raw.items(): + if isinstance(existing_domain, str) and isinstance(existing_values, dict): + domain_blacklists[existing_domain] = { + str(key): str(value) for key, value in existing_values.items() if isinstance(key, str) + } + + domain_values: dict[str, str] = {k: v for k, v in blacklist_values.items() if v} + if domain_values: + domain_blacklists[domain_key] = domain_values + else: + domain_blacklists.pop(domain_key, None) + + reader.set_tag((), "domain_blacklist", domain_blacklists) # pyright: ignore[reportArgumentType] + message = f"Update blacklist for {clean_feed_url} and domain {domain_key}" + + commit_state_change(reader, message) return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303) @@ -556,6 +626,11 @@ async def get_blacklist( regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")) regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")) regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")) + domain_key: str = get_domain_key(feed.url) + domain_blacklist_raw = reader.get_tag((), "domain_blacklist", {}) + domain_blacklist_enabled: bool = bool( + isinstance(domain_blacklist_raw, dict) and domain_key and domain_key in domain_blacklist_raw, + ) context = { "request": request, @@ -568,6 +643,9 @@ async def get_blacklist( "regex_blacklist_summary": regex_blacklist_summary, "regex_blacklist_content": regex_blacklist_content, "regex_blacklist_author": regex_blacklist_author, + "domain_key": domain_key, + "domain_name": extract_domain(feed.url), + "domain_blacklist_enabled": domain_blacklist_enabled, } return templates.TemplateResponse(request=request, name="blacklist.html", context=context) diff --git a/discord_rss_bot/templates/blacklist.html b/discord_rss_bot/templates/blacklist.html index ec16bce..6904085 100644 --- a/discord_rss_bot/templates/blacklist.html +++ b/discord_rss_bot/templates/blacklist.html @@ -1,98 +1,127 @@ {% extends "base.html" %} {% block title %} -| Blacklist + | Blacklist {% endblock title %} {% block content %} -
-
- -
-
-
-
    -
  • - Comma separated list of words to blacklist. If a word is found in the - corresponding blacklists, the feed will not be sent. -
  • -
  • Whitelist always takes precedence over blacklist. Leave empty to disable.
  • -
  • Words are case-insensitive. No spaces should be used before or after the comma.
  • -
  • - Correct: - - primogem,events,gameplay preview,special program - -
  • -
  • - Wrong: - - primogem, events, gameplay preview, special program - -
  • -
-
- - - - - - - - - -
+
+ + +
+
  • - Regular expression patterns for advanced filtering. Each pattern should be on a new - line. + Comma separated list of words to blacklist. If a word is found in the + corresponding blacklists, the feed will not be sent.
  • -
  • Patterns are case-insensitive.
  • +
  • Whitelist always takes precedence over blacklist. Leave empty to disable.
  • +
  • Words are case-insensitive. No spaces should be used before or after the comma.
  • - Examples: + Correct: -
    -^New Release:.*
    -\b(update|version|patch)\s+\d+\.\d+
    -.*\[(important|notice)\].*
    -
    + primogem,events,gameplay preview,special program +
    +
  • +
  • + Wrong: + + primogem, events, gameplay preview, special program
- - - - - - - - - - - + {% if domain_blacklist_enabled %} + + {% endif %} + + + + + + + + +
+
+
    +
  • + Regular expression patterns for advanced filtering. Each pattern should be on a new + line. +
  • +
  • Patterns are case-insensitive.
  • +
  • + Examples: + +
    +^New Release:.*
    +\b(update|version|patch)\s+\d+\.\d+
    +.*\[(important|notice)\].*
    +
    +
    +
  • +
+
+ + + + + + + + +
+
+ + +
-
- - - -
- -
- -
+ + + +
+ +
+ +
{% endblock content %} diff --git a/discord_rss_bot/templates/index.html b/discord_rss_bot/templates/index.html index 6c12656..0229e3e 100644 --- a/discord_rss_bot/templates/index.html +++ b/discord_rss_bot/templates/index.html @@ -59,8 +59,16 @@ {% for domain, domain_feeds in domains.items() %}
-
+

{{ domain }} ({{ domain_feeds|length }})

+ {% if domain_feeds %} + + {% endif %}
    diff --git a/discord_rss_bot/templates/whitelist.html b/discord_rss_bot/templates/whitelist.html index 61755e2..65a1f90 100644 --- a/discord_rss_bot/templates/whitelist.html +++ b/discord_rss_bot/templates/whitelist.html @@ -1,98 +1,127 @@ {% extends "base.html" %} {% block title %} -| Whitelist + | Whitelist {% endblock title %} {% block content %} -
    -
    - -
    -
    -
    -
      -
    • - Comma separated list of words to whitelist. Only send message to - Discord if one of these words are present in the corresponding fields. -
    • -
    • Whitelist always takes precedence over blacklist. Leave empty to disable.
    • -
    • Words are case-insensitive. No spaces should be used before or after the comma.
    • -
    • - Correct: - - primogem,events,gameplay preview,special program - -
    • -
    • - Wrong: - - primogem, events, gameplay preview, special program - -
    • -
    -
    - - - - - - - - - -
    +
    + + +
    +
    • - Regular expression patterns for advanced filtering. Each pattern should be on a new - line. + Comma separated list of words to whitelist. Only send message to + Discord if one of these words are present in the corresponding fields.
    • -
    • Patterns are case-insensitive.
    • +
    • Whitelist always takes precedence over blacklist. Leave empty to disable.
    • +
    • Words are case-insensitive. No spaces should be used before or after the comma.
    • - Examples: + Correct: -
      -^New Release:.*
      -\b(update|version|patch)\s+\d+\.\d+
      -.*\[(important|notice)\].*
      -
      + primogem,events,gameplay preview,special program +
      +
    • +
    • + Wrong: + + primogem, events, gameplay preview, special program
    - - - - - - - - - - - + {% if domain_whitelist_enabled %} + + {% endif %} + + + + + + + + +
    +
    +
      +
    • + Regular expression patterns for advanced filtering. Each pattern should be on a new + line. +
    • +
    • Patterns are case-insensitive.
    • +
    • + Examples: + +
      +^New Release:.*
      +\b(update|version|patch)\s+\d+\.\d+
      +.*\[(important|notice)\].*
      +
      +
      +
    • +
    +
    + + + + + + + + +
    +
    + + +
    -
    - - - -
    - -
    - -
    + + + +
    + +
    + +
    {% endblock content %} diff --git a/tests/test_blacklist.py b/tests/test_blacklist.py index 0c756ad..e64ac0e 100644 --- a/tests/test_blacklist.py +++ b/tests/test_blacklist.py @@ -203,3 +203,33 @@ def test_regex_should_be_skipped() -> None: ) reader.delete_tag(feed, "regex_blacklist_author") assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}" + + +def test_domain_blacklist_should_be_skipped() -> None: + """Domain-wide blacklist should apply to feeds on the same domain.""" + reader: Reader = get_reader() + + reader.add_feed(feed_url) + feed: Feed = reader.get_feed(feed_url) + reader.update_feeds() + + entries: Iterable[Entry] = reader.get_entries(feed=feed) + first_entry: Entry | None = next(iter(entries), None) + assert first_entry is not None, "Expected at least one entry" + + assert feed_has_blacklist_tags(reader, feed) is False, "Feed should not have blacklist tags" + assert entry_should_be_skipped(reader, first_entry) is False, "Entry should not be skipped" + + reader.set_tag( + (), + "domain_blacklist", + { + "lovinator.space": { + "blacklist_author": "TheLovinator", + "regex_blacklist_title": r"fvnnn\\w+", + }, + }, + ) # pyright: ignore[reportArgumentType] + + assert feed_has_blacklist_tags(reader, feed) is True, "Domain blacklist should count as blacklist tags" + assert entry_should_be_skipped(reader, first_entry) is True, "Entry should be skipped by domain blacklist" diff --git a/tests/test_feeds.py b/tests/test_feeds.py index 93c5ccc..b38aadc 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -3,6 +3,8 @@ from __future__ import annotations import asyncio import os import tempfile +from datetime import UTC +from datetime import datetime from pathlib import Path from typing import LiteralString from unittest.mock import MagicMock @@ -190,6 +192,67 @@ def test_get_entry_delivery_mode_falls_back_to_legacy_embed_flag() -> None: assert result == "text" +@patch("discord_rss_bot.feeds.execute_webhook") +@patch("discord_rss_bot.feeds.create_text_webhook") +@patch("discord_rss_bot.feeds.should_be_sent", return_value=True) +@patch("discord_rss_bot.feeds.has_white_tags", return_value=True) +@patch("discord_rss_bot.feeds.entry_should_be_skipped", return_value=True) +def test_send_to_discord_whitelist_precedence_over_blacklist( + mock_entry_should_be_skipped: MagicMock, + mock_has_white_tags: MagicMock, + mock_should_be_sent: MagicMock, + mock_create_text_webhook: MagicMock, + mock_execute_webhook: MagicMock, +) -> None: + """When whitelist is configured and matches, entry should still be sent even if blacklist matches.""" + reader = MagicMock() + feed = MagicMock() + feed.url = "https://example.com/feed.xml" + + entry = MagicMock() + entry.id = "entry-1" + entry.feed = feed + entry.feed_url = feed.url + entry.added = datetime.now(tz=UTC) + + reader.get_entries.return_value = [entry] + + def get_tag_side_effect( + resource: str | Feed, + key: str, + default: str | None = None, + ) -> str | None: + """Side effect function for reader.get_tag to return specific values based on the key. + + Args: + resource: The resource for which the tag is being requested (ignored in this case). + key: The tag key being requested. + default: The default value to return if the key is not found. + + Returns: + - "https://discord.test/webhook" for "webhook" key + - "text" for "delivery_mode" key + - default value for any other key + """ + if key == "webhook": + return "https://discord.test/webhook" + if key == "delivery_mode": + return "text" + return default + + reader.get_tag.side_effect = get_tag_side_effect + + webhook = MagicMock() + mock_create_text_webhook.return_value = webhook + + send_to_discord(reader=reader, feed=feed, do_once=True) + + mock_has_white_tags.assert_called_once_with(reader, feed) + mock_should_be_sent.assert_called_once_with(reader, entry) + mock_entry_should_be_skipped.assert_not_called() + mock_execute_webhook.assert_called_once_with(webhook, entry, reader=reader) + + @patch("discord_rss_bot.feeds.execute_webhook") @patch("discord_rss_bot.feeds.create_text_webhook") @patch("discord_rss_bot.feeds.create_hoyolab_webhook") diff --git a/tests/test_git_backup.py b/tests/test_git_backup.py index bfdf4a2..e9dfb9c 100644 --- a/tests/test_git_backup.py +++ b/tests/test_git_backup.py @@ -173,9 +173,11 @@ def test_export_state_creates_state_json(tmp_path: Path) -> None: tag: str | None = None, default: str | None = None, ) -> list[Any] | str | None: - if feed_or_key == () and tag is None: - # Called for global webhooks list - return [] + if feed_or_key == () and tag == "domain_blacklist": + return {"example.com": {"blacklist_title": "spoiler"}} + + if feed_or_key == () and tag == "domain_whitelist": + return {"example.com": {"whitelist_title": "release"}} if tag == "webhook": return "https://discord.com/api/webhooks/123/abc" @@ -194,6 +196,8 @@ def test_export_state_creates_state_json(tmp_path: Path) -> None: data: dict[str, Any] = json.loads(state_file.read_text(encoding="utf-8")) assert "feeds" in data assert "webhooks" in data + assert data["domain_blacklist"]["example.com"]["blacklist_title"] == "spoiler" + assert data["domain_whitelist"]["example.com"]["whitelist_title"] == "release" assert data["feeds"][0]["url"] == "https://example.com/feed.rss" assert data["feeds"][0]["webhook"] == "https://discord.com/api/webhooks/123/abc" diff --git a/tests/test_main.py b/tests/test_main.py index d680a26..408022d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -221,6 +221,264 @@ def test_get() -> None: assert response.status_code == 200, f"/whitelist failed: {response.text}" +def test_post_blacklist_apply_to_domain_updates_global_domain_blacklist() -> None: + """Posting blacklist with apply_to_domain should save domain-wide blacklist values.""" + reader: Reader = get_reader_dependency() + + # Ensure webhook exists and feed can be created. + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + response = client.post( + url="/blacklist", + data={ + "feed_url": feed_url, + "blacklist_author": "TheLovinator", + "apply_to_domain": "true", + }, + ) + assert response.status_code == 200, f"Failed to post blacklist: {response.text}" + + domain_blacklist = reader.get_tag((), "domain_blacklist", {}) + assert isinstance(domain_blacklist, dict), "domain_blacklist should be a dict" + assert "lovinator.space" in domain_blacklist, "Expected domain key in domain_blacklist" + assert domain_blacklist["lovinator.space"]["blacklist_author"] == "TheLovinator" + + +def test_post_whitelist_apply_to_domain_updates_global_domain_whitelist() -> None: + """Posting whitelist with apply_to_domain should save domain-wide whitelist values.""" + reader: Reader = get_reader_dependency() + + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + response = client.post( + url="/whitelist", + data={ + "feed_url": feed_url, + "whitelist_author": "TheLovinator", + "apply_to_domain": "true", + }, + ) + assert response.status_code == 200, f"Failed to post whitelist: {response.text}" + + domain_whitelist = reader.get_tag((), "domain_whitelist", {}) + assert isinstance(domain_whitelist, dict), "domain_whitelist should be a dict" + assert "lovinator.space" in domain_whitelist, "Expected domain key in domain_whitelist" + assert domain_whitelist["lovinator.space"]["whitelist_author"] == "TheLovinator" + + +def test_domain_filter_pages_show_domain_enabled_notice() -> None: + """Blacklist and whitelist pages should show domain-wide enabled notices when configured.""" + reader: Reader = get_reader_dependency() + + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + reader.set_tag( + (), + "domain_blacklist", + {"lovinator.space": {"blacklist_title": "spoiler"}}, + ) # pyright: ignore[reportArgumentType] + reader.set_tag( + (), + "domain_whitelist", + {"lovinator.space": {"whitelist_title": "release"}}, + ) # pyright: ignore[reportArgumentType] + + response = client.get(url="/blacklist", params={"feed_url": encoded_feed_url(feed_url)}) + assert response.status_code == 200, f"/blacklist failed: {response.text}" + assert "Domain-wide blacklist is enabled for lovinator.space." in response.text + + response = client.get(url="/whitelist", params={"feed_url": encoded_feed_url(feed_url)}) + assert response.status_code == 200, f"/whitelist failed: {response.text}" + assert "Domain-wide whitelist is enabled for lovinator.space." in response.text + + +def test_domain_blacklist_isolation_between_domains() -> None: + """Applying domain blacklist should not overwrite other domains.""" + reader: Reader = get_reader_dependency() + reader.set_tag((), "domain_blacklist", {"example.com": {"blacklist_title": "existing"}}) # pyright: ignore[reportArgumentType] + + response = client.post( + url="/blacklist", + data={ + "feed_url": feed_url, + "blacklist_author": "TheLovinator", + "apply_to_domain": "true", + }, + ) + assert response.status_code == 200, f"Failed to post blacklist: {response.text}" + + domain_blacklist = reader.get_tag((), "domain_blacklist", {}) + assert isinstance(domain_blacklist, dict) + assert domain_blacklist["example.com"]["blacklist_title"] == "existing" + assert domain_blacklist["lovinator.space"]["blacklist_author"] == "TheLovinator" + + +def test_domain_whitelist_isolation_between_domains() -> None: + """Applying domain whitelist should not overwrite other domains.""" + reader: Reader = get_reader_dependency() + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + reader.set_tag((), "domain_whitelist", {"example.com": {"whitelist_title": "existing"}}) # pyright: ignore[reportArgumentType] + + response = client.post( + url="/whitelist", + data={ + "feed_url": feed_url, + "whitelist_author": "TheLovinator", + "apply_to_domain": "true", + }, + ) + assert response.status_code == 200, f"Failed to post whitelist: {response.text}" + + domain_whitelist = reader.get_tag((), "domain_whitelist", {}) + assert isinstance(domain_whitelist, dict) + assert domain_whitelist["example.com"]["whitelist_title"] == "existing" + assert domain_whitelist["lovinator.space"]["whitelist_author"] == "TheLovinator" + + +def test_domain_blacklist_removed_when_apply_to_domain_and_empty_values() -> None: + """Submitting empty domain blacklist values should remove existing domain entry.""" + reader: Reader = get_reader_dependency() + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + reader.set_tag((), "domain_blacklist", {"lovinator.space": {"blacklist_title": "existing"}}) # pyright: ignore[reportArgumentType] + + response = client.post( + url="/blacklist", + data={"feed_url": feed_url, "apply_to_domain": "true"}, + ) + assert response.status_code == 200, f"Failed to post blacklist: {response.text}" + + domain_blacklist = reader.get_tag((), "domain_blacklist", {}) + assert isinstance(domain_blacklist, dict) + assert "lovinator.space" not in domain_blacklist + + +def test_domain_whitelist_removed_when_apply_to_domain_and_empty_values() -> None: + """Submitting empty domain whitelist values should remove existing domain entry.""" + reader: Reader = get_reader_dependency() + reader.set_tag((), "domain_whitelist", {"lovinator.space": {"whitelist_title": "existing"}}) # pyright: ignore[reportArgumentType] + + response = client.post( + url="/whitelist", + data={"feed_url": feed_url, "apply_to_domain": "true"}, + ) + assert response.status_code == 200, f"Failed to post whitelist: {response.text}" + + domain_whitelist = reader.get_tag((), "domain_whitelist", {}) + assert isinstance(domain_whitelist, dict) + assert "lovinator.space" not in domain_whitelist + + +def test_apply_to_domain_missing_does_not_update_domain_tags() -> None: + """When apply_to_domain is omitted, domain tags should not change.""" + reader: Reader = get_reader_dependency() + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + reader.set_tag((), "domain_blacklist", {}) # pyright: ignore[reportArgumentType] + reader.set_tag((), "domain_whitelist", {}) # pyright: ignore[reportArgumentType] + + response = client.post( + url="/blacklist", + data={"feed_url": feed_url, "blacklist_author": "TheLovinator"}, + ) + assert response.status_code == 200, f"Failed to post blacklist: {response.text}" + + response = client.post( + url="/whitelist", + data={"feed_url": feed_url, "whitelist_author": "TheLovinator"}, + ) + assert response.status_code == 200, f"Failed to post whitelist: {response.text}" + + assert reader.get_tag((), "domain_blacklist", {}) == {} + assert reader.get_tag((), "domain_whitelist", {}) == {} + + +def test_apply_to_domain_invalid_value_rejected() -> None: + """Invalid boolean value for apply_to_domain should return validation error.""" + response = client.post( + url="/blacklist", + data={ + "feed_url": feed_url, + "blacklist_author": "TheLovinator", + "apply_to_domain": "invalid-bool", + }, + ) + assert response.status_code == 422, f"Expected 422 for invalid boolean: {response.text}" + + +def test_index_shows_domain_filter_shortcuts() -> None: + """Index should show domain whitelist/blacklist shortcut buttons.""" + client.post(url="/delete_webhook", data={"webhook_url": webhook_url}) + response: Response = client.post( + url="/add_webhook", + data={"webhook_name": webhook_name, "webhook_url": webhook_url}, + ) + assert response.status_code == 200, f"Failed to add webhook: {response.text}" + + client.post(url="/remove", data={"feed_url": feed_url}) + response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name}) + assert response.status_code == 200, f"Failed to add feed: {response.text}" + + response = client.get(url="/") + assert response.status_code == 200, f"Failed to get /: {response.text}" + assert "Domain whitelist" in response.text + assert "Domain blacklist" in response.text + assert f"/whitelist?feed_url={encoded_feed_url(feed_url)}" in response.text + assert f"/blacklist?feed_url={encoded_feed_url(feed_url)}" in response.text + + def test_settings_page_shows_screenshot_layout_setting() -> None: response: Response = client.get(url="/settings") assert response.status_code == 200, f"/settings failed: {response.text}" diff --git a/tests/test_whitelist.py b/tests/test_whitelist.py index 6e911fe..8e7f272 100644 --- a/tests/test_whitelist.py +++ b/tests/test_whitelist.py @@ -184,3 +184,33 @@ def test_regex_should_be_sent() -> None: assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns" reader.delete_tag(feed, "regex_whitelist_author") assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent" + + +def test_domain_whitelist_should_be_sent() -> None: + """Domain-wide whitelist should apply to feeds on the same domain.""" + reader: Reader = get_reader() + + reader.add_feed(feed_url) + feed: Feed = reader.get_feed(feed_url) + reader.update_feeds() + + entries: Iterable[Entry] = reader.get_entries(feed=feed) + first_entry: Entry | None = next(iter(entries), None) + assert first_entry is not None, "Expected at least one entry" + + assert has_white_tags(reader, feed) is False, "Feed should not have whitelist tags" + assert should_be_sent(reader, first_entry) is False, "Entry should not be sent" + + reader.set_tag( + (), + "domain_whitelist", + { + "lovinator.space": { + "whitelist_author": "TheLovinator", + "regex_whitelist_title": r"fvnnn\\w+", + }, + }, + ) # pyright: ignore[reportArgumentType] + + assert has_white_tags(reader, feed) is True, "Domain whitelist should count as whitelist tags" + assert should_be_sent(reader, first_entry) is True, "Entry should be sent by domain whitelist"