From 6a3bba5b698e40212f171206eb1ccbc7b9af5b3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Mon, 27 Apr 2026 18:27:05 +0200 Subject: [PATCH] Add live preview to blacklist and whitelist --- discord_rss_bot/custom_filters.py | 11 +- discord_rss_bot/feeds.py | 15 +- discord_rss_bot/filter/blacklist.py | 84 +-- discord_rss_bot/filter/evaluator.py | 271 ++++++++++ discord_rss_bot/filter/utils.py | 16 +- discord_rss_bot/filter/whitelist.py | 70 +-- discord_rss_bot/main.py | 490 +++++++++++++++++- discord_rss_bot/static/styles.css | 124 +++++ .../templates/_filter_preview.html | 86 +++ discord_rss_bot/templates/blacklist.html | 206 ++++---- discord_rss_bot/templates/whitelist.html | 204 ++++---- tests/test_blacklist.py | 53 ++ tests/test_main.py | 224 ++++++++ tests/test_utils.py | 2 + tests/test_whitelist.py | 53 ++ 15 files changed, 1539 insertions(+), 370 deletions(-) create mode 100644 discord_rss_bot/filter/evaluator.py create mode 100644 discord_rss_bot/templates/_filter_preview.html diff --git a/discord_rss_bot/custom_filters.py b/discord_rss_bot/custom_filters.py index fd9461c..4e060f2 100644 --- a/discord_rss_bot/custom_filters.py +++ b/discord_rss_bot/custom_filters.py @@ -4,10 +4,7 @@ import urllib.parse from functools import lru_cache from typing import TYPE_CHECKING -from discord_rss_bot.filter.blacklist import entry_should_be_skipped -from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags -from discord_rss_bot.filter.whitelist import has_white_tags -from discord_rss_bot.filter.whitelist import should_be_sent +from discord_rss_bot.filter.evaluator import get_entry_filter_decision_from_reader if TYPE_CHECKING: from reader import Entry @@ -41,7 +38,7 @@ def entry_is_whitelisted(entry_to_check: Entry, reader: Reader) -> bool: bool: True if the feed is whitelisted, False otherwise. """ - return bool(has_white_tags(reader, entry_to_check.feed) and should_be_sent(reader, entry_to_check)) + return get_entry_filter_decision_from_reader(reader, entry_to_check).whitelist_match is not None def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool: @@ -55,6 +52,4 @@ def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool: bool: True if the feed is blacklisted, False otherwise. """ - return bool( - feed_has_blacklist_tags(reader, entry_to_check.feed) and entry_should_be_skipped(reader, entry_to_check), - ) + return get_entry_filter_decision_from_reader(reader, entry_to_check).blacklist_match is not None diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py index 90c7af1..684e012 100644 --- a/discord_rss_bot/feeds.py +++ b/discord_rss_bot/feeds.py @@ -37,9 +37,7 @@ from discord_rss_bot.custom_message import CustomEmbed from discord_rss_bot.custom_message import get_custom_message from discord_rss_bot.custom_message import replace_tags_in_embed from discord_rss_bot.custom_message import replace_tags_in_text_message -from discord_rss_bot.filter.blacklist import entry_should_be_skipped -from discord_rss_bot.filter.whitelist import has_white_tags -from discord_rss_bot.filter.whitelist import should_be_sent +from discord_rss_bot.filter.evaluator import get_entry_filter_decision_from_reader from discord_rss_bot.hoyolab_api import create_hoyolab_webhook from discord_rss_bot.hoyolab_api import extract_post_id_from_hoyolab_url from discord_rss_bot.hoyolab_api import fetch_hoyolab_post @@ -711,14 +709,9 @@ def send_to_discord(reader: Reader | None = None, feed: Feed | None = None, *, d use_default_message_on_empty=True, ) - # Check if the entry is blacklisted, and if it is, we will skip it. - if entry_should_be_skipped(effective_reader, entry): - logger.info("Entry was blacklisted: %s", entry.id) - continue - - # Check if the feed has a whitelist, and if it does, check if the entry is whitelisted. - if has_white_tags(effective_reader, entry.feed) and not should_be_sent(effective_reader, entry): - logger.info("Entry was not whitelisted: %s", entry.id) + decision = get_entry_filter_decision_from_reader(effective_reader, entry) + if not decision.should_send: + logger.info("Entry was skipped: %s (%s)", entry.id, decision.reason) continue # Use a custom webhook for Hoyolab feeds. diff --git a/discord_rss_bot/filter/blacklist.py b/discord_rss_bot/filter/blacklist.py index 8260993..836b3b6 100644 --- a/discord_rss_bot/filter/blacklist.py +++ b/discord_rss_bot/filter/blacklist.py @@ -2,8 +2,9 @@ from __future__ import annotations from typing import TYPE_CHECKING -from discord_rss_bot.filter.utils import is_regex_match -from discord_rss_bot.filter.utils import is_word_in_text +from discord_rss_bot.filter.evaluator import find_filter_match +from discord_rss_bot.filter.evaluator import get_filter_values_from_reader +from discord_rss_bot.filter.evaluator import has_filter_values if TYPE_CHECKING: from reader import Entry @@ -31,29 +32,10 @@ def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool: Returns: bool: If the feed has any of the tags. """ - blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip() - blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip() - blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip() - blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip() - - regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip() - regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip() - regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip() - regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip() - - return bool( - blacklist_title - or blacklist_author - or blacklist_content - or blacklist_summary - or regex_blacklist_author - or regex_blacklist_content - or regex_blacklist_summary - or regex_blacklist_title, - ) + return has_filter_values(get_filter_values_from_reader(reader, feed, "blacklist")) -def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 +def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: """Return True if the entry is in the blacklist. Args: @@ -63,58 +45,4 @@ def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0 Returns: bool: If the entry is in the blacklist. """ - feed = entry.feed - - blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip() - blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip() - blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip() - blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip() - - regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip() - regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip() - regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip() - regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip() - # TODO(TheLovinator): Also add support for entry_text and more. - - # Check regular blacklist - if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title): - return True - if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary): - return True - if ( - entry.content - and entry.content[0].value - and blacklist_content - and is_word_in_text(blacklist_content, entry.content[0].value) - ): - return True - if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author): - return True - if ( - entry.content - and entry.content[0].value - and blacklist_content - and is_word_in_text(blacklist_content, entry.content[0].value) - ): - return True - - # Check regex blacklist - if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title): - return True - if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary): - return True - if ( - entry.content - and entry.content[0].value - and regex_blacklist_content - and is_regex_match(regex_blacklist_content, entry.content[0].value) - ): - return True - if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author): - return True - return bool( - entry.content - and entry.content[0].value - and regex_blacklist_content - and is_regex_match(regex_blacklist_content, entry.content[0].value), - ) + return bool(find_filter_match(entry, get_filter_values_from_reader(reader, entry.feed, "blacklist"), "blacklist")) diff --git a/discord_rss_bot/filter/evaluator.py b/discord_rss_bot/filter/evaluator.py new file mode 100644 index 0000000..45ed7d7 --- /dev/null +++ b/discord_rss_bot/filter/evaluator.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from discord_rss_bot.filter.utils import is_regex_match +from discord_rss_bot.filter.utils import is_word_in_text + +if TYPE_CHECKING: + from collections.abc import Mapping + + from reader import Entry + from reader import Feed + from reader import Reader + + +FILTER_FIELDS: tuple[str, str, str, str] = ("title", "summary", "content", "author") +FilterValues = dict[str, str] + + +@dataclass(frozen=True, slots=True) +class FilterMatch: + filter_name: str + field_name: str + match_type: str + pattern: str + + @property + def description(self) -> str: + field_label: str = self.field_name.replace("_", " ") + return f"{self.filter_name} {self.match_type} match on {field_label}" + + +@dataclass(frozen=True, slots=True) +class EntryFilterDecision: + should_send: bool + reason: str + blacklist_match: FilterMatch | None + whitelist_match: FilterMatch | None + has_blacklist_filters: bool + has_whitelist_filters: bool + + +def get_filter_values_from_reader(reader: Reader, feed: Feed, filter_name: str) -> FilterValues: + """Return stripped filter tag values for a feed. + + Args: + reader: The reader instance. + feed: The feed whose filter tags should be loaded. + filter_name: Either blacklist or whitelist. + + Returns: + FilterValues: The current saved filter values. + """ + values: FilterValues = {} + for field_name in FILTER_FIELDS: + values[field_name] = str(reader.get_tag(feed, f"{filter_name}_{field_name}", "")).strip() + values[f"regex_{field_name}"] = str(reader.get_tag(feed, f"regex_{filter_name}_{field_name}", "")).strip() + return values + + +def coerce_filter_values(filter_name: str, values: Mapping[str, str] | None = None) -> FilterValues: + """Normalize incoming filter values from forms or tests. + + Args: + filter_name: Either blacklist or whitelist. + values: Optional raw mapping of form or saved values. + + Returns: + FilterValues: A normalized value mapping. + """ + source_values: Mapping[str, str] = values or {} + normalized_values: FilterValues = {} + for field_name in FILTER_FIELDS: + normalized_values[field_name] = str( + source_values.get(f"{filter_name}_{field_name}", source_values.get(field_name, "")), + ).strip() + normalized_values[f"regex_{field_name}"] = str( + source_values.get( + f"regex_{filter_name}_{field_name}", + source_values.get(f"regex_{field_name}", ""), + ), + ).strip() + return normalized_values + + +def has_filter_values(values: Mapping[str, str]) -> bool: + """Return whether any filter value is configured. + + Args: + values: Filter values to inspect. + + Returns: + bool: True when at least one value is non-empty. + """ + return any(str(value).strip() for value in values.values()) + + +def get_entry_filter_decision_from_reader(reader: Reader, entry: Entry) -> EntryFilterDecision: + """Evaluate an entry against its saved blacklist and whitelist tags. + + Args: + reader: The reader instance. + entry: The entry to evaluate. + + Returns: + EntryFilterDecision: Final decision plus match details. + """ + return evaluate_entry_filters( + entry, + blacklist_values=get_filter_values_from_reader(reader, entry.feed, "blacklist"), + whitelist_values=get_filter_values_from_reader(reader, entry.feed, "whitelist"), + ) + + +def evaluate_entry_filters( + entry: Entry, + *, + blacklist_values: Mapping[str, str] | None = None, + whitelist_values: Mapping[str, str] | None = None, +) -> EntryFilterDecision: + """Evaluate one entry against blacklist and whitelist settings. + + Whitelist matches take precedence over blacklist matches. + + Args: + entry: The entry to evaluate. + blacklist_values: Blacklist values from saved tags or a form. + whitelist_values: Whitelist values from saved tags or a form. + + Returns: + EntryFilterDecision: Final decision plus match details. + """ + normalized_blacklist_values: FilterValues = coerce_filter_values("blacklist", blacklist_values) + normalized_whitelist_values: FilterValues = coerce_filter_values("whitelist", whitelist_values) + + blacklist_match: FilterMatch | None = find_filter_match(entry, normalized_blacklist_values, "blacklist") + whitelist_match: FilterMatch | None = find_filter_match(entry, normalized_whitelist_values, "whitelist") + + has_blacklist_filters: bool = has_filter_values(normalized_blacklist_values) + has_whitelist_filters: bool = has_filter_values(normalized_whitelist_values) + + if whitelist_match and blacklist_match: + return EntryFilterDecision( + should_send=True, + reason=f"Sent because {whitelist_match.description}; whitelist overrides blacklist.", + blacklist_match=blacklist_match, + whitelist_match=whitelist_match, + has_blacklist_filters=has_blacklist_filters, + has_whitelist_filters=has_whitelist_filters, + ) + + if whitelist_match: + return EntryFilterDecision( + should_send=True, + reason=f"Sent because {whitelist_match.description}.", + blacklist_match=blacklist_match, + whitelist_match=whitelist_match, + has_blacklist_filters=has_blacklist_filters, + has_whitelist_filters=has_whitelist_filters, + ) + + if has_whitelist_filters and blacklist_match: + return EntryFilterDecision( + should_send=False, + reason=f"Skipped because {blacklist_match.description} and no whitelist rule matched.", + blacklist_match=blacklist_match, + whitelist_match=whitelist_match, + has_blacklist_filters=has_blacklist_filters, + has_whitelist_filters=has_whitelist_filters, + ) + + if has_whitelist_filters: + return EntryFilterDecision( + should_send=False, + reason="Skipped because no whitelist rule matched.", + blacklist_match=blacklist_match, + whitelist_match=whitelist_match, + has_blacklist_filters=has_blacklist_filters, + has_whitelist_filters=has_whitelist_filters, + ) + + if blacklist_match: + return EntryFilterDecision( + should_send=False, + reason=f"Skipped because {blacklist_match.description}.", + blacklist_match=blacklist_match, + whitelist_match=whitelist_match, + has_blacklist_filters=has_blacklist_filters, + has_whitelist_filters=has_whitelist_filters, + ) + + return EntryFilterDecision( + should_send=True, + reason="Sent because no active filter blocked it.", + blacklist_match=blacklist_match, + whitelist_match=whitelist_match, + has_blacklist_filters=has_blacklist_filters, + has_whitelist_filters=has_whitelist_filters, + ) + + +def find_filter_match(entry: Entry, values: Mapping[str, str], filter_name: str) -> FilterMatch | None: + """Return the first matching filter rule for an entry. + + Args: + entry: The entry to evaluate. + values: Normalized filter values. + filter_name: Either blacklist or whitelist. + + Returns: + FilterMatch | None: The first matching rule, if any. + """ + entry_fields: dict[str, str] = get_entry_fields(entry) + + for field_name in FILTER_FIELDS: + pattern: str = str(values.get(field_name, "")).strip() + field_text: str = entry_fields[field_name] + if pattern and field_text and is_word_in_text(pattern, field_text): + return FilterMatch( + filter_name=filter_name, + field_name=field_name, + match_type="text", + pattern=pattern, + ) + + for field_name in FILTER_FIELDS: + pattern = str(values.get(f"regex_{field_name}", "")).strip() + field_text = entry_fields[field_name] + if pattern and field_text and is_regex_match(pattern, field_text): + return FilterMatch( + filter_name=filter_name, + field_name=field_name, + match_type="regex", + pattern=pattern, + ) + + return None + + +def get_entry_fields(entry: Entry) -> dict[str, str]: + """Return the entry fields used during filter matching. + + Args: + entry: The entry to inspect. + + Returns: + dict[str, str]: The fields used by filter evaluation. + """ + content_value: str = "" + if entry.content and entry.content[0].value: + content_value = entry.content[0].value + + return { + "title": entry.title or "", + "summary": entry.summary or "", + "content": content_value, + "author": entry.author or "", + } + + +def get_entry_decision_key(entry: Entry) -> str: + """Return a stable key for mapping preview decisions to entries. + + Args: + entry: The entry to key. + + Returns: + str: A stable key based on feed URL and entry id. + """ + return f"{entry.feed.url}|{entry.id}" diff --git a/discord_rss_bot/filter/utils.py b/discord_rss_bot/filter/utils.py index ff93e59..7bbcce3 100644 --- a/discord_rss_bot/filter/utils.py +++ b/discord_rss_bot/filter/utils.py @@ -7,22 +7,22 @@ logger: logging.Logger = logging.getLogger(__name__) def is_word_in_text(word_string: str, text: str) -> bool: - """Check if any of the words are in the text. + """Check if any comma-separated terms are in the text. Args: - word_string: A comma-separated string of words to search for. + word_string: A comma-separated string of terms to search for. text: The text to search in. Returns: - bool: True if any word is found in the text, otherwise False. + bool: True if any term is found in the text, otherwise False. """ - word_list: list[str] = word_string.split(",") + if not word_string or not text: + return False - # Compile regex patterns for each word. - patterns: list[re.Pattern[str]] = [re.compile(rf"(^|[^\w]){word}([^\w]|$)", re.IGNORECASE) for word in word_list] + normalized_text: str = text.casefold() + terms: list[str] = [term.strip().casefold() for term in word_string.split(",") if term.strip()] - # Check if any pattern matches the text. - return any(pattern.search(text) for pattern in patterns) + return any(term in normalized_text for term in terms) def is_regex_match(regex_string: str, text: str) -> bool: diff --git a/discord_rss_bot/filter/whitelist.py b/discord_rss_bot/filter/whitelist.py index bb5303d..956c659 100644 --- a/discord_rss_bot/filter/whitelist.py +++ b/discord_rss_bot/filter/whitelist.py @@ -2,8 +2,9 @@ from __future__ import annotations from typing import TYPE_CHECKING -from discord_rss_bot.filter.utils import is_regex_match -from discord_rss_bot.filter.utils import is_word_in_text +from discord_rss_bot.filter.evaluator import find_filter_match +from discord_rss_bot.filter.evaluator import get_filter_values_from_reader +from discord_rss_bot.filter.evaluator import has_filter_values if TYPE_CHECKING: from reader import Entry @@ -31,29 +32,10 @@ def has_white_tags(reader: Reader, feed: Feed) -> bool: Returns: bool: If the feed has any of the tags. """ - whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip() - whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip() - whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip() - whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip() - - regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip() - regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip() - regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip() - regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip() - - return bool( - whitelist_title - or whitelist_author - or whitelist_content - or whitelist_summary - or regex_whitelist_author - or regex_whitelist_content - or regex_whitelist_summary - or regex_whitelist_title, - ) + return has_filter_values(get_filter_values_from_reader(reader, feed, "whitelist")) -def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 +def should_be_sent(reader: Reader, entry: Entry) -> bool: """Return True if the entry is in the whitelist. Args: @@ -63,44 +45,4 @@ def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 Returns: bool: If the entry is in the whitelist. """ - feed: Feed = entry.feed - # Regular whitelist tags - whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip() - whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip() - whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip() - whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip() - - # Regex whitelist tags - regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip() - regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip() - regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip() - regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip() - - # Check regular whitelist - if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title): - return True - if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary): - return True - if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author): - return True - if ( - entry.content - and entry.content[0].value - and whitelist_content - and is_word_in_text(whitelist_content, entry.content[0].value) - ): - return True - - # Check regex whitelist - if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title): - return True - if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary): - return True - if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author): - return True - return bool( - entry.content - and entry.content[0].value - and regex_whitelist_content - and is_regex_match(regex_whitelist_content, entry.content[0].value), - ) + return bool(find_filter_match(entry, get_filter_values_from_reader(reader, entry.feed, "whitelist"), "whitelist")) diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py index 961c70e..d639d27 100644 --- a/discord_rss_bot/main.py +++ b/discord_rss_bot/main.py @@ -3,6 +3,7 @@ from __future__ import annotations import json import logging import logging.config +import re import typing import urllib.parse from contextlib import asynccontextmanager @@ -10,6 +11,8 @@ from dataclasses import dataclass from datetime import UTC from datetime import datetime from functools import lru_cache +from html import escape +from html import unescape from typing import TYPE_CHECKING from typing import Annotated from typing import Any @@ -54,6 +57,14 @@ from discord_rss_bot.feeds import get_feed_delivery_mode from discord_rss_bot.feeds import get_screenshot_layout from discord_rss_bot.feeds import send_entry_to_discord from discord_rss_bot.feeds import send_to_discord +from discord_rss_bot.filter.evaluator import FILTER_FIELDS +from discord_rss_bot.filter.evaluator import EntryFilterDecision +from discord_rss_bot.filter.evaluator import FilterMatch +from discord_rss_bot.filter.evaluator import coerce_filter_values +from discord_rss_bot.filter.evaluator import evaluate_entry_filters +from discord_rss_bot.filter.evaluator import get_entry_decision_key +from discord_rss_bot.filter.evaluator import get_entry_fields +from discord_rss_bot.filter.evaluator import get_filter_values_from_reader from discord_rss_bot.git_backup import commit_state_change from discord_rss_bot.git_backup import get_backup_path from discord_rss_bot.is_url_valid import is_url_valid @@ -125,6 +136,15 @@ def has_webhooks() -> bool: SECONDS_PER_MINUTE = 60 SECONDS_PER_HOUR = 3600 SECONDS_PER_DAY = 86400 +FILTER_PREVIEW_LIMIT = 50 +PREVIEW_FIELD_LABELS: dict[str, str] = { + "title": "Title", + "author": "Author", + "summary": "Description", + "content": "Content", +} +PREVIEW_HTML_TAG_PATTERN = re.compile(r"<[^>]+>") +PREVIEW_WHITESPACE_PATTERN = re.compile(r"\s+") def relative_time(dt: datetime | None) -> str: @@ -459,19 +479,50 @@ async def get_whitelist( """ clean_feed_url: str = feed_url.strip() feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url)) - - whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")) - whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")) - whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")) - whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")) - regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")) - regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")) - regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")) - regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")) - context = { "request": request, "feed": feed, + **build_filter_form_context("whitelist", get_filter_values_from_reader(reader, feed, "whitelist")), + **build_filter_preview_context(reader, feed, "whitelist"), + } + return templates.TemplateResponse(request=request, name="whitelist.html", context=context) + + +@app.get("/whitelist_preview", response_class=HTMLResponse) +async def get_whitelist_preview( + feed_url: str, + request: Request, + reader: Annotated[Reader, Depends(get_reader_dependency)], + whitelist_title: str = "", + whitelist_summary: str = "", + whitelist_content: str = "", + whitelist_author: str = "", + regex_whitelist_title: str = "", + regex_whitelist_summary: str = "", + regex_whitelist_content: str = "", + regex_whitelist_author: str = "", +) -> HTMLResponse: + """Render the whitelist preview fragment for HTMX updates. + + Args: + feed_url: Feed URL whose entries should be previewed. + request: The request object. + reader: The Reader instance. + whitelist_title: Word-based title whitelist. + whitelist_summary: Word-based summary whitelist. + whitelist_content: Word-based content whitelist. + whitelist_author: Word-based author whitelist. + regex_whitelist_title: Regex title whitelist. + regex_whitelist_summary: Regex summary whitelist. + regex_whitelist_content: Regex content whitelist. + regex_whitelist_author: Regex author whitelist. + + Returns: + HTMLResponse: Rendered filter preview fragment. + """ + clean_feed_url: str = urllib.parse.unquote(feed_url.strip()) + feed: Feed = reader.get_feed(clean_feed_url) + form_values: dict[str, str] = { "whitelist_title": whitelist_title, "whitelist_summary": whitelist_summary, "whitelist_content": whitelist_content, @@ -481,7 +532,16 @@ async def get_whitelist( "regex_whitelist_content": regex_whitelist_content, "regex_whitelist_author": regex_whitelist_author, } - return templates.TemplateResponse(request=request, name="whitelist.html", context=context) + + return templates.TemplateResponse( + request=request, + name="_filter_preview.html", + context={ + "request": request, + "feed": feed, + **build_filter_preview_context(reader, feed, "whitelist", form_values=form_values), + }, + ) @app.post("/blacklist") @@ -548,18 +608,50 @@ async def get_blacklist( """ feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url)) - blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")) - blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")) - blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")) - blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")) - regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")) - regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")) - regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")) - regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")) - context = { "request": request, "feed": feed, + **build_filter_form_context("blacklist", get_filter_values_from_reader(reader, feed, "blacklist")), + **build_filter_preview_context(reader, feed, "blacklist"), + } + return templates.TemplateResponse(request=request, name="blacklist.html", context=context) + + +@app.get("/blacklist_preview", response_class=HTMLResponse) +async def get_blacklist_preview( + feed_url: str, + request: Request, + reader: Annotated[Reader, Depends(get_reader_dependency)], + blacklist_title: str = "", + blacklist_summary: str = "", + blacklist_content: str = "", + blacklist_author: str = "", + regex_blacklist_title: str = "", + regex_blacklist_summary: str = "", + regex_blacklist_content: str = "", + regex_blacklist_author: str = "", +) -> HTMLResponse: + """Render the blacklist preview fragment for HTMX updates. + + Args: + feed_url: Feed URL whose entries should be previewed. + request: The request object. + reader: The Reader instance. + blacklist_title: Word-based title blacklist. + blacklist_summary: Word-based summary blacklist. + blacklist_content: Word-based content blacklist. + blacklist_author: Word-based author blacklist. + regex_blacklist_title: Regex title blacklist. + regex_blacklist_summary: Regex summary blacklist. + regex_blacklist_content: Regex content blacklist. + regex_blacklist_author: Regex author blacklist. + + Returns: + HTMLResponse: Rendered filter preview fragment. + """ + clean_feed_url: str = urllib.parse.unquote(feed_url.strip()) + feed: Feed = reader.get_feed(clean_feed_url) + form_values: dict[str, str] = { "blacklist_title": blacklist_title, "blacklist_summary": blacklist_summary, "blacklist_content": blacklist_content, @@ -569,7 +661,347 @@ async def get_blacklist( "regex_blacklist_content": regex_blacklist_content, "regex_blacklist_author": regex_blacklist_author, } - return templates.TemplateResponse(request=request, name="blacklist.html", context=context) + + return templates.TemplateResponse( + request=request, + name="_filter_preview.html", + context={ + "request": request, + "feed": feed, + **build_filter_preview_context(reader, feed, "blacklist", form_values=form_values), + }, + ) + + +def build_filter_form_context(filter_name: str, values: dict[str, str]) -> dict[str, str]: + """Return template context keys for a filter form. + + Args: + filter_name: Either blacklist or whitelist. + values: Normalized filter values. + + Returns: + dict[str, str]: Template keys matching current form field names. + """ + context: dict[str, str] = {} + for field_name in FILTER_FIELDS: + context[f"{filter_name}_{field_name}"] = values[field_name] + context[f"regex_{filter_name}_{field_name}"] = values[f"regex_{field_name}"] + return context + + +def build_filter_preview_context( + reader: Reader, + feed: Feed, + filter_name: str, + form_values: dict[str, str] | None = None, +) -> dict[str, Any]: + """Build preview data for the blacklist and whitelist pages. + + Args: + reader: The Reader instance. + feed: The feed being previewed. + filter_name: Either blacklist or whitelist. + form_values: Optional unsaved values from the current form. + + Returns: + dict[str, Any]: Preview context for template rendering. + """ + saved_blacklist_values: dict[str, str] = get_filter_values_from_reader(reader, feed, "blacklist") + saved_whitelist_values: dict[str, str] = get_filter_values_from_reader(reader, feed, "whitelist") + + preview_blacklist_values: dict[str, str] = saved_blacklist_values + preview_whitelist_values: dict[str, str] = saved_whitelist_values + helper_text: str = "Saved whitelist rules still apply while previewing blacklist changes." + + if filter_name == "blacklist": + preview_blacklist_values = coerce_filter_values("blacklist", form_values) + else: + preview_whitelist_values = coerce_filter_values("whitelist", form_values) + helper_text = "Saved blacklist rules still apply while previewing whitelist changes." + + preview_entries: list[Entry] = list(reader.get_entries(feed=feed, limit=FILTER_PREVIEW_LIMIT)) + preview_rows: list[dict[str, Any]] = [] + preview_decisions: dict[str, EntryFilterDecision] = {} + sent_count = 0 + skipped_count = 0 + blacklist_match_count = 0 + whitelist_match_count = 0 + + for entry in preview_entries: + decision: EntryFilterDecision = evaluate_entry_filters( + entry, + blacklist_values=preview_blacklist_values, + whitelist_values=preview_whitelist_values, + ) + preview_decisions[get_entry_decision_key(entry)] = decision + + if decision.should_send: + sent_count += 1 + else: + skipped_count += 1 + + if decision.blacklist_match: + blacklist_match_count += 1 + if decision.whitelist_match: + whitelist_match_count += 1 + + published_label: str = "Unknown date" + if entry.published: + published_label = entry.published.strftime("%Y-%m-%d %H:%M:%S") + + preview_rows.append( + { + "entry": entry, + "decision": decision, + "field_rows": build_preview_field_rows(entry, decision), + "published_label": published_label, + "status_label": "Sent" if decision.should_send else "Skipped", + "status_class": "success" if decision.should_send else "danger", + }, + ) + + preview_html: str = create_html_for_feed( + reader=reader, + entries=preview_entries, + current_feed_url=feed.url, + entry_decisions=preview_decisions, + ) + + return { + "filter_name": filter_name, + "filter_label": filter_name.title(), + "preview_entries": preview_entries, + "preview_rows": preview_rows, + "preview_html": preview_html, + "preview_limit": FILTER_PREVIEW_LIMIT, + "preview_summary": { + "total": len(preview_entries), + "sent": sent_count, + "skipped": skipped_count, + "blacklist_matches": blacklist_match_count, + "whitelist_matches": whitelist_match_count, + }, + "preview_helper_text": helper_text, + } + + +def build_preview_field_rows(entry: Entry, decision: EntryFilterDecision) -> list[dict[str, Any]]: + """Build labeled preview fields for the filter UI. + + Args: + entry: Entry whose values should be shown. + decision: The final decision for the entry. + + Returns: + list[dict[str, Any]]: Labeled field rows for the preview template. + """ + entry_fields: dict[str, str] = get_entry_fields(entry) + field_rows: list[dict[str, Any]] = [] + + for field_name in ("title", "author", "summary", "content"): + badges: list[dict[str, str]] = [] + matches: list[tuple[FilterMatch, str]] = [] + if decision.blacklist_match and decision.blacklist_match.field_name == field_name: + badges.append({"label": "Blacklist match", "class": "danger"}) + matches.append((decision.blacklist_match, "danger")) + if decision.whitelist_match and decision.whitelist_match.field_name == field_name: + badges.append({"label": "Whitelist match", "class": "success"}) + matches.append((decision.whitelist_match, "success")) + + field_rows.append( + { + "label": PREVIEW_FIELD_LABELS[field_name], + "value_html": format_preview_field_value(entry_fields[field_name], matches), + "badges": badges, + }, + ) + + return field_rows + + +def format_preview_field_value( + value: str, + matches: list[tuple[FilterMatch, str]], + max_length: int = 280, +) -> str: + """Convert entry field content into readable preview text with highlight markup. + + Args: + value: Raw field value from the entry. + matches: Matching filters for this field and their display classes. + max_length: Max number of characters to display. + + Returns: + str: Normalized preview HTML. + """ + normalized_value: str = normalize_preview_field_value(value) + if not normalized_value: + return "No value" + + highlighted_span, highlight_class = get_preview_highlight_span(normalized_value, matches) + clipped_value, clipped_span = clip_preview_value(normalized_value, highlighted_span, max_length) + + if clipped_span is None or highlight_class is None: + return escape(clipped_value) + + start, end = clipped_span + return "".join( + [ + escape(clipped_value[:start]), + f'', + escape(clipped_value[start:end]), + "", + escape(clipped_value[end:]), + ], + ) + + +def normalize_preview_field_value(value: str) -> str: + """Convert entry field content into readable plain text. + + Args: + value: Raw field value. + + Returns: + str: Plain-text preview value. + """ + if not value: + return "" + + plain_text: str = PREVIEW_HTML_TAG_PATTERN.sub(" ", value) + return PREVIEW_WHITESPACE_PATTERN.sub(" ", unescape(plain_text)).strip() + + +def get_preview_highlight_span( + value: str, + matches: list[tuple[FilterMatch, str]], +) -> tuple[tuple[int, int] | None, str | None]: + """Return the earliest highlight span for the preview field. + + Args: + value: Normalized field value. + matches: Matching filters and associated preview classes. + + Returns: + tuple[tuple[int, int] | None, str | None]: Span and highlight class. + """ + first_span: tuple[int, int] | None = None + first_class: str | None = None + + for match, highlight_class in matches: + span = get_filter_match_span(value, match) + if span is None: + continue + if first_span is None or span[0] < first_span[0]: + first_span = span + first_class = highlight_class + + return first_span, first_class + + +def get_filter_match_span(value: str, match: FilterMatch) -> tuple[int, int] | None: + """Return the matched substring span for a preview field. + + Args: + value: Normalized preview value. + match: Matching filter metadata. + + Returns: + tuple[int, int] | None: The first matching span if found. + """ + if match.match_type == "regex": + return get_regex_match_span(value, match.pattern) + return get_text_match_span(value, match.pattern) + + +def get_text_match_span(value: str, pattern: str) -> tuple[int, int] | None: + """Return the earliest case-insensitive substring span for comma-separated text terms.""" + earliest_span: tuple[int, int] | None = None + for term in [part.strip() for part in pattern.split(",") if part.strip()]: + compiled_pattern = re.compile(re.escape(term), re.IGNORECASE) + match = compiled_pattern.search(value) + if match and (earliest_span is None or match.start() < earliest_span[0]): + earliest_span = match.span() + return earliest_span + + +def get_regex_match_span(value: str, pattern: str) -> tuple[int, int] | None: + """Return the earliest regex match span for newline/comma-separated patterns.""" + earliest_span: tuple[int, int] | None = None + for pattern_str in split_regex_patterns(pattern): + try: + compiled_pattern = re.compile(pattern_str, re.IGNORECASE) + except re.error: + continue + + match = compiled_pattern.search(value) + if match and match.start() != match.end(): + current_span = match.span() + if earliest_span is None or current_span[0] < earliest_span[0]: + earliest_span = current_span + return earliest_span + + +def split_regex_patterns(pattern: str) -> list[str]: + """Split regex filter text using the same newline/comma semantics as the matcher. + + Args: + pattern: The raw regex pattern string. + + Returns: + list[str]: A list of individual regex patterns. + """ + regex_patterns: list[str] = [] + for line in pattern.split("\n"): + stripped_line = line.strip() + if not stripped_line: + continue + if "," in stripped_line: + regex_patterns.extend([part.strip() for part in stripped_line.split(",") if part.strip()]) + else: + regex_patterns.append(stripped_line) + return regex_patterns + + +def clip_preview_value( + value: str, + highlight_span: tuple[int, int] | None, + max_length: int, +) -> tuple[str, tuple[int, int] | None]: + """Clip a preview value while keeping the highlighted match visible when possible. + + Args: + value: The normalized preview value. + highlight_span: The span of the highlighted match within the value. + max_length: The maximum length of the clipped value. + + Returns: + tuple[str, tuple[int, int] | None]: The clipped preview value and adjusted highlight + """ + if len(value) <= max_length: + return value, highlight_span + + if highlight_span is None: + return f"{value[: max_length - 1].rstrip()}…", None + + match_start, match_end = highlight_span + window_start = max(0, match_start - (max_length // 3)) + window_end = min(len(value), window_start + max_length) + if match_end > window_end: + window_end = min(len(value), match_end + (max_length // 3)) + window_start = max(0, window_end - max_length) + + clipped_value = value[window_start:window_end] + clipped_span = (match_start - window_start, match_end - window_start) + + if window_start > 0: + clipped_value = f"…{clipped_value}" + clipped_span = (clipped_span[0] + 1, clipped_span[1] + 1) + if window_end < len(value): + clipped_value = f"{clipped_value}…" + + return clipped_value, clipped_span @app.post("/custom") @@ -1239,6 +1671,7 @@ def create_html_for_feed( # noqa: C901, PLR0914 reader: Reader, entries: Iterable[Entry], current_feed_url: str = "", + entry_decisions: dict[str, EntryFilterDecision] | None = None, ) -> str: """Create HTML for the search results. @@ -1246,6 +1679,7 @@ def create_html_for_feed( # noqa: C901, PLR0914 reader: The Reader instance to use. entries: The entries to create HTML for. current_feed_url: The feed URL currently being viewed in /feed. + entry_decisions: Optional preview decisions keyed by feed URL and entry id. Returns: str: The HTML for the search results. @@ -1268,12 +1702,22 @@ def create_html_for_feed( # noqa: C901, PLR0914 if entry.published: published: str = entry.published.strftime("%Y-%m-%d %H:%M:%S") + decision: EntryFilterDecision | None = None + if entry_decisions is not None: + decision = entry_decisions.get(get_entry_decision_key(entry)) + + is_blacklisted: bool = entry_is_blacklisted(entry, reader=reader) + is_whitelisted: bool = entry_is_whitelisted(entry, reader=reader) + if decision is not None: + is_blacklisted = decision.blacklist_match is not None + is_whitelisted = decision.whitelist_match is not None + blacklisted: str = "" - if entry_is_blacklisted(entry, reader=reader): + if is_blacklisted: blacklisted = "Blacklisted" whitelisted: str = "" - if entry_is_whitelisted(entry, reader=reader): + if is_whitelisted: whitelisted = "Whitelisted" source_feed_url: str = getattr(entry, "original_feed_url", None) or entry.feed.url diff --git a/discord_rss_bot/static/styles.css b/discord_rss_bot/static/styles.css index 9dde06f..f6776b5 100644 --- a/discord_rss_bot/static/styles.css +++ b/discord_rss_bot/static/styles.css @@ -64,3 +64,127 @@ body { overflow-wrap: anywhere; word-break: break-word; } + +.filter-page__sidebar { + height: 100%; +} + +.filter-page__example { + white-space: pre-wrap; + overflow-wrap: anywhere; + color: #d8d8d8; + font-size: 0.9rem; +} + +.filter-preview__list { + max-height: 48vh; + overflow-y: auto; + padding-right: 0.25rem; +} + +.filter-preview__content { + min-width: 0; +} + +.filter-preview__link { + overflow-wrap: anywhere; +} + +.filter-preview__status { + min-width: 5.5rem; +} + +.filter-preview__pattern { + display: inline-flex; + align-items: center; + padding: 0.15rem 0.5rem; + border: 1px solid #2f2f2f; + background: #1b1b1b; + color: #d7d7d7; + overflow-wrap: anywhere; +} + +.filter-preview__rendered { + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.filter-preview__rendered>div { + margin-bottom: 0; +} + +.filter-preview__field-table { + display: flex; + flex-direction: column; + gap: 0; + border: 1px solid #2f2f2f; + background: #1b1b1b; +} + +.filter-preview__field-row { + display: grid; + grid-template-columns: minmax(5.5rem, 6.75rem) minmax(0, 1fr) auto; + gap: 0.75rem; + align-items: start; + padding: 0.55rem 0.75rem; + border-bottom: 1px solid #2a2a2a; +} + +.filter-preview__field-row:last-child { + border-bottom: 0; +} + +.filter-preview__field-name { + color: #d8d8d8; + font-size: 0.78rem; + font-weight: 600; + letter-spacing: 0.04em; + text-transform: uppercase; +} + +.filter-preview__field-value { + color: #bfbfbf; + overflow-wrap: anywhere; + word-break: break-word; +} + +.filter-preview__field-badges { + display: flex; + flex-wrap: wrap; + justify-content: flex-end; + gap: 0.25rem; +} + +.filter-preview__match { + padding: 0 0.15rem; + border-radius: 0.15rem; +} + +.filter-preview__match--danger { + background: #652020; + color: #ffe2e2; +} + +.filter-preview__match--success { + background: #1e5330; + color: #def8e5; +} + +@media (max-width: 767.98px) { + .filter-preview__field-row { + grid-template-columns: 1fr; + gap: 0.35rem; + } + + .filter-preview__field-badges { + justify-content: flex-start; + } +} + +@media (min-width: 992px) { + .filter-page__sidebar { + position: sticky; + top: 1rem; + } +} diff --git a/discord_rss_bot/templates/_filter_preview.html b/discord_rss_bot/templates/_filter_preview.html new file mode 100644 index 0000000..dccebd7 --- /dev/null +++ b/discord_rss_bot/templates/_filter_preview.html @@ -0,0 +1,86 @@ +
+
+
+

Live preview

+

Latest {{ preview_limit }} entries from {{ feed.title or feed.url }}

+
+
+ {{ preview_summary.total }} checked + {{ preview_summary.sent }} sent + {{ preview_summary.skipped }} skipped + {{ preview_summary.blacklist_matches }} blacklist match{{ 'es' if preview_summary.blacklist_matches != 1 else '' }} + {{ preview_summary.whitelist_matches }} whitelist match{{ 'es' if preview_summary.whitelist_matches != 1 else '' }} +
+
+

{{ preview_helper_text }}

+
+
+

Decision list

+ Updates as you type. Saving is still manual. +
+ {% if preview_rows %} +
+ {% for row in preview_rows %} +
+
+
+
+ {% if row.entry.link %} + {{ row.entry.title or row.entry.id }} + {% else %} + {{ row.entry.title or row.entry.id }} + {% endif %} +
+

+ {% if row.entry.author %}By {{ row.entry.author }} |{% endif %} + {{ row.published_label }} +

+
+ {{ row.status_label }} +
+

{{ row.decision.reason }}

+
+ {% if row.decision.blacklist_match %} + {{ row.decision.blacklist_match.description }} + {{ row.decision.blacklist_match.pattern }} + {% endif %} + {% if row.decision.whitelist_match %} + {{ row.decision.whitelist_match.description }} + {{ row.decision.whitelist_match.pattern }} + {% endif %} +
+
+ {% for field in row.field_rows %} +
+
{{ field.label }}
+
{{ field.value_html|safe }}
+
+ {% for badge in field.badges %}{{ badge.label }}{% endfor %} +
+
+ {% endfor %} +
+
+ {% endfor %} +
+ {% else %} +
+

No entries are available yet for this feed, so there is nothing to preview.

+
+ {% endif %} +
+
+
+

Rendered entries

+ Uses the same entry rendering as the feed page. +
+ {% if preview_html %} +
{{ preview_html|safe }}
+ {% else %} +
+

Rendered preview will appear here when entries are available.

+
+ {% endif %} +
+
diff --git a/discord_rss_bot/templates/blacklist.html b/discord_rss_bot/templates/blacklist.html index ec16bce..54e1220 100644 --- a/discord_rss_bot/templates/blacklist.html +++ b/discord_rss_bot/templates/blacklist.html @@ -1,98 +1,126 @@ {% extends "base.html" %} {% block title %} -| Blacklist + | Blacklist {% endblock title %} {% block content %} -
-
- -
-
-
-
    -
  • - Comma separated list of words to blacklist. If a word is found in the - corresponding blacklists, the feed will not be sent. -
  • -
  • Whitelist always takes precedence over blacklist. Leave empty to disable.
  • -
  • Words are case-insensitive. No spaces should be used before or after the comma.
  • -
  • - Correct: - - primogem,events,gameplay preview,special program - -
  • -
  • - Wrong: - - primogem, events, gameplay preview, special program - -
  • -
-
- - - - - - - - - -
-
-
    -
  • - Regular expression patterns for advanced filtering. Each pattern should be on a new - line. -
  • -
  • Patterns are case-insensitive.
  • -
  • - Examples: - -
    -^New Release:.*
    -\b(update|version|patch)\s+\d+\.\d+
    -.*\[(important|notice)\].*
    -
    -
    -
  • -
+
+
+
+
+
+

Blacklist Rules

+

+ Build block rules on the left and watch the latest feed entries update on the right before you save. +

+
+

+ Use comma-separated terms or snippets for quick blocking. Use regex when the pattern is more specific. +

+

+ Plain text matching is case-insensitive and partial, so orld matches World of Warcraft. +

+

Whitelist matches still win. If an entry matches both, the preview keeps it as sent.

+

Keep the left side for editing and the right side for checking what gets removed.

+
- - - - - - - - - - - + + +
+

Word Rules

+
+

Comma separated terms or snippets. Spaces around commas are ignored.

+

+ Example: + primogem,events,orld,special program +

+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+

Regex Rules

+
+

One pattern per line. Matching is case-insensitive.

+
^New Release:.*
+\b(update|version|patch)\s+\d+\.\d+
+.*\[(important|notice)\].*
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + Back to feed +
+
-
+
- - - -
- +
+
+
+
{% include "_filter_preview.html" %}
+
+
- -
+
{% endblock content %} diff --git a/discord_rss_bot/templates/whitelist.html b/discord_rss_bot/templates/whitelist.html index 61755e2..fd27eb6 100644 --- a/discord_rss_bot/templates/whitelist.html +++ b/discord_rss_bot/templates/whitelist.html @@ -1,98 +1,124 @@ {% extends "base.html" %} {% block title %} -| Whitelist + | Whitelist {% endblock title %} {% block content %} -
-
- -
-
-
-
    -
  • - Comma separated list of words to whitelist. Only send message to - Discord if one of these words are present in the corresponding fields. -
  • -
  • Whitelist always takes precedence over blacklist. Leave empty to disable.
  • -
  • Words are case-insensitive. No spaces should be used before or after the comma.
  • -
  • - Correct: - - primogem,events,gameplay preview,special program - -
  • -
  • - Wrong: - - primogem, events, gameplay preview, special program - -
  • -
-
- - - - - - - - - -
-
-
    -
  • - Regular expression patterns for advanced filtering. Each pattern should be on a new - line. -
  • -
  • Patterns are case-insensitive.
  • -
  • - Examples: - -
    -^New Release:.*
    -\b(update|version|patch)\s+\d+\.\d+
    -.*\[(important|notice)\].*
    -
    -
    -
  • -
+
+
+
+
+
+

Whitelist Rules

+

+ Shape what is allowed through, and use the live pane to see which entries are the only ones that will still be sent. +

+
+

Whitelist rules are restrictive. If any whitelist rule exists, entries must match it to be sent.

+

+ Plain text matching is case-insensitive and partial, so orld matches World of Warcraft. +

+

When an entry matches both lists, whitelist still wins and the preview shows it as sent.

+

Saved blacklist rules remain active while you preview whitelist edits.

+
- - - - - - - - - - - + + +
+

Word Rules

+
+

Comma separated terms or snippets. Spaces around commas are ignored.

+

+ Example: + primogem,events,orld,special program +

+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+

Regex Rules

+
+

One pattern per line. Matching is case-insensitive.

+
^New Release:.*
+\b(update|version|patch)\s+\d+\.\d+
+.*\[(important|notice)\].*
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + Back to feed +
+
-
+
- - - -
- +
+
+
+
{% include "_filter_preview.html" %}
+
+
- -
+
{% endblock content %} diff --git a/tests/test_blacklist.py b/tests/test_blacklist.py index 0c756ad..9e99518 100644 --- a/tests/test_blacklist.py +++ b/tests/test_blacklist.py @@ -11,6 +11,8 @@ from reader import make_reader from discord_rss_bot.filter.blacklist import entry_should_be_skipped from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags +from discord_rss_bot.filter.evaluator import evaluate_entry_filters +from discord_rss_bot.filter.evaluator import get_filter_values_from_reader if TYPE_CHECKING: from collections.abc import Iterable @@ -203,3 +205,54 @@ def test_regex_should_be_skipped() -> None: ) reader.delete_tag(feed, "regex_blacklist_author") assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}" + + +def test_whitelist_match_overrides_blacklist_match() -> None: + """A whitelist hit should beat a blacklist hit in the final decision.""" + reader: Reader = get_reader() + + reader.add_feed(feed_url) + feed: Feed = reader.get_feed(feed_url) + reader.update_feeds() + + first_entry: list[Entry] = [] + entries: Iterable[Entry] = reader.get_entries(feed=feed) + for entry in entries: + first_entry.append(entry) + break + + assert len(first_entry) == 1, f"First entry should be added: {first_entry}" + + reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType] + reader.set_tag(feed, "whitelist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType] + + decision = evaluate_entry_filters( + first_entry[0], + blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"), + whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"), + ) + + assert decision.should_send is True, "Whitelist match should override blacklist match" + assert decision.blacklist_match is not None, "Expected a blacklist match" + assert decision.whitelist_match is not None, "Expected a whitelist match" + assert "whitelist overrides blacklist" in decision.reason + + +def test_blacklist_substring_match_on_title() -> None: + """Blacklist plain-text rules should match title substrings.""" + reader: Reader = get_reader() + + reader.add_feed(feed_url) + feed: Feed = reader.get_feed(feed_url) + reader.update_feeds() + + first_entry: list[Entry] = [] + entries: Iterable[Entry] = reader.get_entries(feed=feed) + for entry in entries: + first_entry.append(entry) + break + + assert len(first_entry) == 1, f"First entry should be added: {first_entry}" + + reader.set_tag(feed, "blacklist_title", "vnnnfn") # pyright: ignore[reportArgumentType] + assert entry_should_be_skipped(reader, first_entry[0]) is True, "Substring title match should blacklist the entry" diff --git a/tests/test_main.py b/tests/test_main.py index d680a26..117422c 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -37,6 +37,15 @@ def encoded_feed_url(url: str) -> str: return urllib.parse.quote(feed_url) if url else "" +def ensure_preview_feed_exists() -> Reader: + reader: Reader = get_reader_dependency() + with contextlib.suppress(Exception): + reader.add_feed(feed_url) + with contextlib.suppress(Exception): + reader.update_feed(feed_url) + return reader + + def test_search() -> None: """Test the /search page.""" # Remove the feed if it already exists before we run the test. @@ -221,6 +230,221 @@ def test_get() -> None: assert response.status_code == 200, f"/whitelist failed: {response.text}" +def test_blacklist_page_uses_live_preview_layout() -> None: + ensure_preview_feed_exists() + + response: Response = client.get(url="/blacklist", params={"feed_url": encoded_feed_url(feed_url)}) + + assert response.status_code == 200, f"/blacklist failed: {response.text}" + assert 'hx-get="/blacklist_preview"' in response.text + assert 'id="filter-preview"' in response.text + assert "Blacklist Rules" in response.text + + +def test_whitelist_page_uses_live_preview_layout() -> None: + ensure_preview_feed_exists() + + response: Response = client.get(url="/whitelist", params={"feed_url": encoded_feed_url(feed_url)}) + + assert response.status_code == 200, f"/whitelist failed: {response.text}" + assert 'hx-get="/whitelist_preview"' in response.text + assert 'id="filter-preview"' in response.text + assert "Whitelist Rules" in response.text + + +def test_blacklist_preview_does_not_persist_unsaved_rules() -> None: + reader: Reader = ensure_preview_feed_exists() + reader.set_tag(feed_url, "blacklist_title", "saved-blacklist") # pyright: ignore[reportArgumentType] + + try: + response: Response = client.get( + url="/blacklist_preview", + params={ + "feed_url": feed_url, + "blacklist_title": "fvnnnfnfdnfdnfd", + }, + ) + + assert response.status_code == 200, f"/blacklist_preview failed: {response.text}" + assert "Live preview" in response.text + assert reader.get_tag(feed_url, "blacklist_title", "") == "saved-blacklist" + finally: + with contextlib.suppress(Exception): + reader.delete_tag(feed_url, "blacklist_title") + + +def test_whitelist_preview_shows_precedence_over_blacklist() -> None: + reader: Reader = ensure_preview_feed_exists() + reader.set_tag(feed_url, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType] + + try: + response: Response = client.get( + url="/whitelist_preview", + params={ + "feed_url": feed_url, + "whitelist_title": "fvnnnfnfdnfdnfd", + }, + ) + + assert response.status_code == 200, f"/whitelist_preview failed: {response.text}" + assert "whitelist overrides blacklist" in response.text + assert "Sent" in response.text + finally: + with contextlib.suppress(Exception): + reader.delete_tag(feed_url, "blacklist_title") + + +def test_blacklist_preview_uses_50_entry_limit() -> None: + @dataclass(slots=True) + class DummyContent: + value: str + + @dataclass(slots=True) + class DummyFeed: + url: str + title: str + + @dataclass(slots=True) + class DummyEntry: + id: str + feed: DummyFeed + title: str + summary: str + author: str + link: str + published: datetime | None + content: list[DummyContent] = field(default_factory=lambda: [DummyContent("content")]) + + class StubReader: + def __init__(self) -> None: + self.feed = DummyFeed(url="https://example.com/filter-preview.xml", title="Preview Feed") + self.recorded_limit: int | None = None + self.entries: list[Entry] = [ + cast( + "Entry", + DummyEntry( + id=f"entry-{index}", + feed=self.feed, + title=f"Entry {index}", + summary=f"Summary {index}", + author="Author", + link=f"https://example.com/entry-{index}", + published=datetime(2024, 1, 1, tzinfo=UTC), + ), + ) + for index in range(60) + ] + + def get_feed(self, _feed_url: str) -> DummyFeed: + return self.feed + + def get_entries(self, **kwargs: object) -> list[Entry]: + limit = kwargs.get("limit") + self.recorded_limit = limit if isinstance(limit, int) else None + if isinstance(limit, int): + return self.entries[:limit] + return self.entries + + def get_tag(self, _resource: object, _key: str, default: object = None) -> object: + return default + + stub_reader = StubReader() + app.dependency_overrides[get_reader_dependency] = lambda: stub_reader + + try: + with patch("discord_rss_bot.main.create_html_for_feed", return_value="
Rendered
"): + response: Response = client.get( + url="/blacklist_preview", + params={"feed_url": stub_reader.feed.url}, + ) + + assert response.status_code == 200, f"/blacklist_preview failed: {response.text}" + assert stub_reader.recorded_limit == 50, ( + f"Expected preview to request 50 entries, got {stub_reader.recorded_limit}" + ) + assert "50 checked" in response.text + finally: + app.dependency_overrides = {} + + +def test_blacklist_preview_shows_labeled_field_values_for_substring_match() -> None: + @dataclass(slots=True) + class DummyContent: + value: str + + @dataclass(slots=True) + class DummyFeed: + url: str + title: str + + @dataclass(slots=True) + class DummyEntry: + id: str + feed: DummyFeed + title: str + summary: str + author: str + link: str + published: datetime | None + content: list[DummyContent] = field(default_factory=list) + + class StubReader: + def __init__(self) -> None: + self.feed = DummyFeed(url="https://example.com/wow.xml", title="Warcraft Feed") + self.entries: list[Entry] = [ + cast( + "Entry", + DummyEntry( + id="wow-1", + feed=self.feed, + title="World of Warcraft", + summary="

Massive MMO news update

", + author="Blizzard", + link="https://example.com/wow-1", + published=datetime(2024, 1, 1, tzinfo=UTC), + content=[DummyContent("

The expansion launches soon.

")], + ), + ), + ] + + def get_feed(self, _feed_url: str) -> DummyFeed: + return self.feed + + def get_entries(self, **_kwargs: object) -> list[Entry]: + return self.entries + + def get_tag(self, _resource: object, _key: str, default: object = None) -> object: + return default + + stub_reader = StubReader() + app.dependency_overrides[get_reader_dependency] = lambda: stub_reader + + try: + with patch("discord_rss_bot.main.create_html_for_feed", return_value="
Rendered
"): + response: Response = client.get( + url="/blacklist_preview", + params={ + "feed_url": stub_reader.feed.url, + "blacklist_title": "orld", + }, + ) + + assert response.status_code == 200, f"/blacklist_preview failed: {response.text}" + assert "Skipped" in response.text + assert "World of Warcraft" in response.text + assert "Title" in response.text + assert "Author" in response.text + assert "Description" in response.text + assert "Content" in response.text + assert "filter-preview__field-row" in response.text + assert "filter-preview__match" in response.text + assert 'orld' in response.text + assert "Massive MMO news update" in response.text + assert "The expansion launches soon." in response.text + finally: + app.dependency_overrides = {} + + def test_settings_page_shows_screenshot_layout_setting() -> None: response: Response = client.get(url="/settings") assert response.status_code == 200, f"/settings failed: {response.text}" diff --git a/tests/test_utils.py b/tests/test_utils.py index d4ee2ae..19f70e9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,6 +12,8 @@ def test_is_word_in_text() -> None: assert is_word_in_text("word1,word2", "This is a sample text containing word1.") is True, msg_true assert is_word_in_text("word1,word2", "This is a sample text containing word2.") is True, msg_true assert is_word_in_text("word1,word2", "This is a sample text containing WORD1 and WORD2.") is True, msg_true + assert is_word_in_text("orld", "World of Warcraft") is True, msg_true + assert is_word_in_text(" orld , craft ", "World of Warcraft") is True, msg_true assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false diff --git a/tests/test_whitelist.py b/tests/test_whitelist.py index 6e911fe..250dcfd 100644 --- a/tests/test_whitelist.py +++ b/tests/test_whitelist.py @@ -9,6 +9,8 @@ from reader import Feed from reader import Reader from reader import make_reader +from discord_rss_bot.filter.evaluator import evaluate_entry_filters +from discord_rss_bot.filter.evaluator import get_filter_values_from_reader from discord_rss_bot.filter.whitelist import has_white_tags from discord_rss_bot.filter.whitelist import should_be_sent @@ -184,3 +186,54 @@ def test_regex_should_be_sent() -> None: assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns" reader.delete_tag(feed, "regex_whitelist_author") assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent" + + +def test_active_whitelist_blocks_non_matching_blacklisted_entry() -> None: + """An active whitelist should block non-matching entries even if blacklist also matches.""" + reader: Reader = get_reader() + + reader.add_feed(feed_url) + feed: Feed = reader.get_feed(feed_url) + reader.update_feeds() + + first_entry: list[Entry] = [] + entries: Iterable[Entry] = reader.get_entries(feed=feed) + for entry in entries: + first_entry.append(entry) + break + + assert len(first_entry) == 1, "First entry should be added" + + reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType] + reader.set_tag(feed, "whitelist_title", "does-not-match") # pyright: ignore[reportArgumentType] + + decision = evaluate_entry_filters( + first_entry[0], + blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"), + whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"), + ) + + assert decision.should_send is False, "Entry should be skipped when whitelist is active but does not match" + assert decision.blacklist_match is not None, "Expected a blacklist match" + assert decision.whitelist_match is None, "Expected whitelist to miss" + assert "no whitelist rule matched" in decision.reason + + +def test_whitelist_substring_match_on_title() -> None: + """Whitelist plain-text rules should match title substrings.""" + reader: Reader = get_reader() + + reader.add_feed(feed_url) + feed: Feed = reader.get_feed(feed_url) + reader.update_feeds() + + first_entry: list[Entry] = [] + entries: Iterable[Entry] = reader.get_entries(feed=feed) + for entry in entries: + first_entry.append(entry) + break + + assert len(first_entry) == 1, "First entry should be added" + + reader.set_tag(feed, "whitelist_title", "vnnnfn") # pyright: ignore[reportArgumentType] + assert should_be_sent(reader, first_entry[0]) is True, "Substring title match should whitelist the entry"