Add live preview to blacklist and whitelist
All checks were successful
Test and build Docker image / docker (push) Successful in 1m58s

This commit is contained in:
Joakim Hellsén 2026-04-27 18:27:05 +02:00
commit 6a3bba5b69
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
15 changed files with 1539 additions and 370 deletions

View file

@ -4,10 +4,7 @@ import urllib.parse
from functools import lru_cache from functools import lru_cache
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from discord_rss_bot.filter.blacklist import entry_should_be_skipped from discord_rss_bot.filter.evaluator import get_entry_filter_decision_from_reader
from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags
from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent
if TYPE_CHECKING: if TYPE_CHECKING:
from reader import Entry from reader import Entry
@ -41,7 +38,7 @@ def entry_is_whitelisted(entry_to_check: Entry, reader: Reader) -> bool:
bool: True if the feed is whitelisted, False otherwise. bool: True if the feed is whitelisted, False otherwise.
""" """
return bool(has_white_tags(reader, entry_to_check.feed) and should_be_sent(reader, entry_to_check)) return get_entry_filter_decision_from_reader(reader, entry_to_check).whitelist_match is not None
def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool: def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool:
@ -55,6 +52,4 @@ def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool:
bool: True if the feed is blacklisted, False otherwise. bool: True if the feed is blacklisted, False otherwise.
""" """
return bool( return get_entry_filter_decision_from_reader(reader, entry_to_check).blacklist_match is not None
feed_has_blacklist_tags(reader, entry_to_check.feed) and entry_should_be_skipped(reader, entry_to_check),
)

View file

@ -37,9 +37,7 @@ from discord_rss_bot.custom_message import CustomEmbed
from discord_rss_bot.custom_message import get_custom_message from discord_rss_bot.custom_message import get_custom_message
from discord_rss_bot.custom_message import replace_tags_in_embed from discord_rss_bot.custom_message import replace_tags_in_embed
from discord_rss_bot.custom_message import replace_tags_in_text_message from discord_rss_bot.custom_message import replace_tags_in_text_message
from discord_rss_bot.filter.blacklist import entry_should_be_skipped from discord_rss_bot.filter.evaluator import get_entry_filter_decision_from_reader
from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent
from discord_rss_bot.hoyolab_api import create_hoyolab_webhook from discord_rss_bot.hoyolab_api import create_hoyolab_webhook
from discord_rss_bot.hoyolab_api import extract_post_id_from_hoyolab_url from discord_rss_bot.hoyolab_api import extract_post_id_from_hoyolab_url
from discord_rss_bot.hoyolab_api import fetch_hoyolab_post from discord_rss_bot.hoyolab_api import fetch_hoyolab_post
@ -711,14 +709,9 @@ def send_to_discord(reader: Reader | None = None, feed: Feed | None = None, *, d
use_default_message_on_empty=True, use_default_message_on_empty=True,
) )
# Check if the entry is blacklisted, and if it is, we will skip it. decision = get_entry_filter_decision_from_reader(effective_reader, entry)
if entry_should_be_skipped(effective_reader, entry): if not decision.should_send:
logger.info("Entry was blacklisted: %s", entry.id) logger.info("Entry was skipped: %s (%s)", entry.id, decision.reason)
continue
# Check if the feed has a whitelist, and if it does, check if the entry is whitelisted.
if has_white_tags(effective_reader, entry.feed) and not should_be_sent(effective_reader, entry):
logger.info("Entry was not whitelisted: %s", entry.id)
continue continue
# Use a custom webhook for Hoyolab feeds. # Use a custom webhook for Hoyolab feeds.

View file

@ -2,8 +2,9 @@ from __future__ import annotations
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import is_regex_match from discord_rss_bot.filter.evaluator import find_filter_match
from discord_rss_bot.filter.utils import is_word_in_text from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.evaluator import has_filter_values
if TYPE_CHECKING: if TYPE_CHECKING:
from reader import Entry from reader import Entry
@ -31,29 +32,10 @@ def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
Returns: Returns:
bool: If the feed has any of the tags. bool: If the feed has any of the tags.
""" """
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip() return has_filter_values(get_filter_values_from_reader(reader, feed, "blacklist"))
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
return bool(
blacklist_title
or blacklist_author
or blacklist_content
or blacklist_summary
or regex_blacklist_author
or regex_blacklist_content
or regex_blacklist_summary
or regex_blacklist_title,
)
def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool:
"""Return True if the entry is in the blacklist. """Return True if the entry is in the blacklist.
Args: Args:
@ -63,58 +45,4 @@ def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0
Returns: Returns:
bool: If the entry is in the blacklist. bool: If the entry is in the blacklist.
""" """
feed = entry.feed return bool(find_filter_match(entry, get_filter_values_from_reader(reader, entry.feed, "blacklist"), "blacklist"))
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
# TODO(TheLovinator): Also add support for entry_text and more.
# Check regular blacklist
if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
return True
if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
return True
if (
entry.content
and entry.content[0].value
and blacklist_content
and is_word_in_text(blacklist_content, entry.content[0].value)
):
return True
if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
return True
if (
entry.content
and entry.content[0].value
and blacklist_content
and is_word_in_text(blacklist_content, entry.content[0].value)
):
return True
# Check regex blacklist
if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title):
return True
if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary):
return True
if (
entry.content
and entry.content[0].value
and regex_blacklist_content
and is_regex_match(regex_blacklist_content, entry.content[0].value)
):
return True
if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author):
return True
return bool(
entry.content
and entry.content[0].value
and regex_blacklist_content
and is_regex_match(regex_blacklist_content, entry.content[0].value),
)

View file

@ -0,0 +1,271 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import is_regex_match
from discord_rss_bot.filter.utils import is_word_in_text
if TYPE_CHECKING:
from collections.abc import Mapping
from reader import Entry
from reader import Feed
from reader import Reader
FILTER_FIELDS: tuple[str, str, str, str] = ("title", "summary", "content", "author")
FilterValues = dict[str, str]
@dataclass(frozen=True, slots=True)
class FilterMatch:
filter_name: str
field_name: str
match_type: str
pattern: str
@property
def description(self) -> str:
field_label: str = self.field_name.replace("_", " ")
return f"{self.filter_name} {self.match_type} match on {field_label}"
@dataclass(frozen=True, slots=True)
class EntryFilterDecision:
should_send: bool
reason: str
blacklist_match: FilterMatch | None
whitelist_match: FilterMatch | None
has_blacklist_filters: bool
has_whitelist_filters: bool
def get_filter_values_from_reader(reader: Reader, feed: Feed, filter_name: str) -> FilterValues:
"""Return stripped filter tag values for a feed.
Args:
reader: The reader instance.
feed: The feed whose filter tags should be loaded.
filter_name: Either blacklist or whitelist.
Returns:
FilterValues: The current saved filter values.
"""
values: FilterValues = {}
for field_name in FILTER_FIELDS:
values[field_name] = str(reader.get_tag(feed, f"{filter_name}_{field_name}", "")).strip()
values[f"regex_{field_name}"] = str(reader.get_tag(feed, f"regex_{filter_name}_{field_name}", "")).strip()
return values
def coerce_filter_values(filter_name: str, values: Mapping[str, str] | None = None) -> FilterValues:
"""Normalize incoming filter values from forms or tests.
Args:
filter_name: Either blacklist or whitelist.
values: Optional raw mapping of form or saved values.
Returns:
FilterValues: A normalized value mapping.
"""
source_values: Mapping[str, str] = values or {}
normalized_values: FilterValues = {}
for field_name in FILTER_FIELDS:
normalized_values[field_name] = str(
source_values.get(f"{filter_name}_{field_name}", source_values.get(field_name, "")),
).strip()
normalized_values[f"regex_{field_name}"] = str(
source_values.get(
f"regex_{filter_name}_{field_name}",
source_values.get(f"regex_{field_name}", ""),
),
).strip()
return normalized_values
def has_filter_values(values: Mapping[str, str]) -> bool:
"""Return whether any filter value is configured.
Args:
values: Filter values to inspect.
Returns:
bool: True when at least one value is non-empty.
"""
return any(str(value).strip() for value in values.values())
def get_entry_filter_decision_from_reader(reader: Reader, entry: Entry) -> EntryFilterDecision:
"""Evaluate an entry against its saved blacklist and whitelist tags.
Args:
reader: The reader instance.
entry: The entry to evaluate.
Returns:
EntryFilterDecision: Final decision plus match details.
"""
return evaluate_entry_filters(
entry,
blacklist_values=get_filter_values_from_reader(reader, entry.feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, entry.feed, "whitelist"),
)
def evaluate_entry_filters(
entry: Entry,
*,
blacklist_values: Mapping[str, str] | None = None,
whitelist_values: Mapping[str, str] | None = None,
) -> EntryFilterDecision:
"""Evaluate one entry against blacklist and whitelist settings.
Whitelist matches take precedence over blacklist matches.
Args:
entry: The entry to evaluate.
blacklist_values: Blacklist values from saved tags or a form.
whitelist_values: Whitelist values from saved tags or a form.
Returns:
EntryFilterDecision: Final decision plus match details.
"""
normalized_blacklist_values: FilterValues = coerce_filter_values("blacklist", blacklist_values)
normalized_whitelist_values: FilterValues = coerce_filter_values("whitelist", whitelist_values)
blacklist_match: FilterMatch | None = find_filter_match(entry, normalized_blacklist_values, "blacklist")
whitelist_match: FilterMatch | None = find_filter_match(entry, normalized_whitelist_values, "whitelist")
has_blacklist_filters: bool = has_filter_values(normalized_blacklist_values)
has_whitelist_filters: bool = has_filter_values(normalized_whitelist_values)
if whitelist_match and blacklist_match:
return EntryFilterDecision(
should_send=True,
reason=f"Sent because {whitelist_match.description}; whitelist overrides blacklist.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if whitelist_match:
return EntryFilterDecision(
should_send=True,
reason=f"Sent because {whitelist_match.description}.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if has_whitelist_filters and blacklist_match:
return EntryFilterDecision(
should_send=False,
reason=f"Skipped because {blacklist_match.description} and no whitelist rule matched.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if has_whitelist_filters:
return EntryFilterDecision(
should_send=False,
reason="Skipped because no whitelist rule matched.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if blacklist_match:
return EntryFilterDecision(
should_send=False,
reason=f"Skipped because {blacklist_match.description}.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
return EntryFilterDecision(
should_send=True,
reason="Sent because no active filter blocked it.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
def find_filter_match(entry: Entry, values: Mapping[str, str], filter_name: str) -> FilterMatch | None:
"""Return the first matching filter rule for an entry.
Args:
entry: The entry to evaluate.
values: Normalized filter values.
filter_name: Either blacklist or whitelist.
Returns:
FilterMatch | None: The first matching rule, if any.
"""
entry_fields: dict[str, str] = get_entry_fields(entry)
for field_name in FILTER_FIELDS:
pattern: str = str(values.get(field_name, "")).strip()
field_text: str = entry_fields[field_name]
if pattern and field_text and is_word_in_text(pattern, field_text):
return FilterMatch(
filter_name=filter_name,
field_name=field_name,
match_type="text",
pattern=pattern,
)
for field_name in FILTER_FIELDS:
pattern = str(values.get(f"regex_{field_name}", "")).strip()
field_text = entry_fields[field_name]
if pattern and field_text and is_regex_match(pattern, field_text):
return FilterMatch(
filter_name=filter_name,
field_name=field_name,
match_type="regex",
pattern=pattern,
)
return None
def get_entry_fields(entry: Entry) -> dict[str, str]:
"""Return the entry fields used during filter matching.
Args:
entry: The entry to inspect.
Returns:
dict[str, str]: The fields used by filter evaluation.
"""
content_value: str = ""
if entry.content and entry.content[0].value:
content_value = entry.content[0].value
return {
"title": entry.title or "",
"summary": entry.summary or "",
"content": content_value,
"author": entry.author or "",
}
def get_entry_decision_key(entry: Entry) -> str:
"""Return a stable key for mapping preview decisions to entries.
Args:
entry: The entry to key.
Returns:
str: A stable key based on feed URL and entry id.
"""
return f"{entry.feed.url}|{entry.id}"

View file

@ -7,22 +7,22 @@ logger: logging.Logger = logging.getLogger(__name__)
def is_word_in_text(word_string: str, text: str) -> bool: def is_word_in_text(word_string: str, text: str) -> bool:
"""Check if any of the words are in the text. """Check if any comma-separated terms are in the text.
Args: Args:
word_string: A comma-separated string of words to search for. word_string: A comma-separated string of terms to search for.
text: The text to search in. text: The text to search in.
Returns: Returns:
bool: True if any word is found in the text, otherwise False. bool: True if any term is found in the text, otherwise False.
""" """
word_list: list[str] = word_string.split(",") if not word_string or not text:
return False
# Compile regex patterns for each word. normalized_text: str = text.casefold()
patterns: list[re.Pattern[str]] = [re.compile(rf"(^|[^\w]){word}([^\w]|$)", re.IGNORECASE) for word in word_list] terms: list[str] = [term.strip().casefold() for term in word_string.split(",") if term.strip()]
# Check if any pattern matches the text. return any(term in normalized_text for term in terms)
return any(pattern.search(text) for pattern in patterns)
def is_regex_match(regex_string: str, text: str) -> bool: def is_regex_match(regex_string: str, text: str) -> bool:

View file

@ -2,8 +2,9 @@ from __future__ import annotations
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import is_regex_match from discord_rss_bot.filter.evaluator import find_filter_match
from discord_rss_bot.filter.utils import is_word_in_text from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.evaluator import has_filter_values
if TYPE_CHECKING: if TYPE_CHECKING:
from reader import Entry from reader import Entry
@ -31,29 +32,10 @@ def has_white_tags(reader: Reader, feed: Feed) -> bool:
Returns: Returns:
bool: If the feed has any of the tags. bool: If the feed has any of the tags.
""" """
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip() return has_filter_values(get_filter_values_from_reader(reader, feed, "whitelist"))
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
return bool(
whitelist_title
or whitelist_author
or whitelist_content
or whitelist_summary
or regex_whitelist_author
or regex_whitelist_content
or regex_whitelist_summary
or regex_whitelist_title,
)
def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911 def should_be_sent(reader: Reader, entry: Entry) -> bool:
"""Return True if the entry is in the whitelist. """Return True if the entry is in the whitelist.
Args: Args:
@ -63,44 +45,4 @@ def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
Returns: Returns:
bool: If the entry is in the whitelist. bool: If the entry is in the whitelist.
""" """
feed: Feed = entry.feed return bool(find_filter_match(entry, get_filter_values_from_reader(reader, entry.feed, "whitelist"), "whitelist"))
# Regular whitelist tags
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
# Regex whitelist tags
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
# Check regular whitelist
if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
return True
if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
return True
if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
return True
if (
entry.content
and entry.content[0].value
and whitelist_content
and is_word_in_text(whitelist_content, entry.content[0].value)
):
return True
# Check regex whitelist
if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title):
return True
if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary):
return True
if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author):
return True
return bool(
entry.content
and entry.content[0].value
and regex_whitelist_content
and is_regex_match(regex_whitelist_content, entry.content[0].value),
)

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import json import json
import logging import logging
import logging.config import logging.config
import re
import typing import typing
import urllib.parse import urllib.parse
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@ -10,6 +11,8 @@ from dataclasses import dataclass
from datetime import UTC from datetime import UTC
from datetime import datetime from datetime import datetime
from functools import lru_cache from functools import lru_cache
from html import escape
from html import unescape
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Annotated from typing import Annotated
from typing import Any from typing import Any
@ -54,6 +57,14 @@ from discord_rss_bot.feeds import get_feed_delivery_mode
from discord_rss_bot.feeds import get_screenshot_layout from discord_rss_bot.feeds import get_screenshot_layout
from discord_rss_bot.feeds import send_entry_to_discord from discord_rss_bot.feeds import send_entry_to_discord
from discord_rss_bot.feeds import send_to_discord from discord_rss_bot.feeds import send_to_discord
from discord_rss_bot.filter.evaluator import FILTER_FIELDS
from discord_rss_bot.filter.evaluator import EntryFilterDecision
from discord_rss_bot.filter.evaluator import FilterMatch
from discord_rss_bot.filter.evaluator import coerce_filter_values
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_entry_decision_key
from discord_rss_bot.filter.evaluator import get_entry_fields
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.git_backup import commit_state_change from discord_rss_bot.git_backup import commit_state_change
from discord_rss_bot.git_backup import get_backup_path from discord_rss_bot.git_backup import get_backup_path
from discord_rss_bot.is_url_valid import is_url_valid from discord_rss_bot.is_url_valid import is_url_valid
@ -125,6 +136,15 @@ def has_webhooks() -> bool:
SECONDS_PER_MINUTE = 60 SECONDS_PER_MINUTE = 60
SECONDS_PER_HOUR = 3600 SECONDS_PER_HOUR = 3600
SECONDS_PER_DAY = 86400 SECONDS_PER_DAY = 86400
FILTER_PREVIEW_LIMIT = 50
PREVIEW_FIELD_LABELS: dict[str, str] = {
"title": "Title",
"author": "Author",
"summary": "Description",
"content": "Content",
}
PREVIEW_HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
PREVIEW_WHITESPACE_PATTERN = re.compile(r"\s+")
def relative_time(dt: datetime | None) -> str: def relative_time(dt: datetime | None) -> str:
@ -459,19 +479,50 @@ async def get_whitelist(
""" """
clean_feed_url: str = feed_url.strip() clean_feed_url: str = feed_url.strip()
feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url)) feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url))
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", ""))
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", ""))
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", ""))
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", ""))
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", ""))
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
context = { context = {
"request": request, "request": request,
"feed": feed, "feed": feed,
**build_filter_form_context("whitelist", get_filter_values_from_reader(reader, feed, "whitelist")),
**build_filter_preview_context(reader, feed, "whitelist"),
}
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
@app.get("/whitelist_preview", response_class=HTMLResponse)
async def get_whitelist_preview(
feed_url: str,
request: Request,
reader: Annotated[Reader, Depends(get_reader_dependency)],
whitelist_title: str = "",
whitelist_summary: str = "",
whitelist_content: str = "",
whitelist_author: str = "",
regex_whitelist_title: str = "",
regex_whitelist_summary: str = "",
regex_whitelist_content: str = "",
regex_whitelist_author: str = "",
) -> HTMLResponse:
"""Render the whitelist preview fragment for HTMX updates.
Args:
feed_url: Feed URL whose entries should be previewed.
request: The request object.
reader: The Reader instance.
whitelist_title: Word-based title whitelist.
whitelist_summary: Word-based summary whitelist.
whitelist_content: Word-based content whitelist.
whitelist_author: Word-based author whitelist.
regex_whitelist_title: Regex title whitelist.
regex_whitelist_summary: Regex summary whitelist.
regex_whitelist_content: Regex content whitelist.
regex_whitelist_author: Regex author whitelist.
Returns:
HTMLResponse: Rendered filter preview fragment.
"""
clean_feed_url: str = urllib.parse.unquote(feed_url.strip())
feed: Feed = reader.get_feed(clean_feed_url)
form_values: dict[str, str] = {
"whitelist_title": whitelist_title, "whitelist_title": whitelist_title,
"whitelist_summary": whitelist_summary, "whitelist_summary": whitelist_summary,
"whitelist_content": whitelist_content, "whitelist_content": whitelist_content,
@ -481,7 +532,16 @@ async def get_whitelist(
"regex_whitelist_content": regex_whitelist_content, "regex_whitelist_content": regex_whitelist_content,
"regex_whitelist_author": regex_whitelist_author, "regex_whitelist_author": regex_whitelist_author,
} }
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
return templates.TemplateResponse(
request=request,
name="_filter_preview.html",
context={
"request": request,
"feed": feed,
**build_filter_preview_context(reader, feed, "whitelist", form_values=form_values),
},
)
@app.post("/blacklist") @app.post("/blacklist")
@ -548,18 +608,50 @@ async def get_blacklist(
""" """
feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url)) feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url))
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", ""))
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", ""))
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", ""))
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", ""))
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", ""))
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
context = { context = {
"request": request, "request": request,
"feed": feed, "feed": feed,
**build_filter_form_context("blacklist", get_filter_values_from_reader(reader, feed, "blacklist")),
**build_filter_preview_context(reader, feed, "blacklist"),
}
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
@app.get("/blacklist_preview", response_class=HTMLResponse)
async def get_blacklist_preview(
feed_url: str,
request: Request,
reader: Annotated[Reader, Depends(get_reader_dependency)],
blacklist_title: str = "",
blacklist_summary: str = "",
blacklist_content: str = "",
blacklist_author: str = "",
regex_blacklist_title: str = "",
regex_blacklist_summary: str = "",
regex_blacklist_content: str = "",
regex_blacklist_author: str = "",
) -> HTMLResponse:
"""Render the blacklist preview fragment for HTMX updates.
Args:
feed_url: Feed URL whose entries should be previewed.
request: The request object.
reader: The Reader instance.
blacklist_title: Word-based title blacklist.
blacklist_summary: Word-based summary blacklist.
blacklist_content: Word-based content blacklist.
blacklist_author: Word-based author blacklist.
regex_blacklist_title: Regex title blacklist.
regex_blacklist_summary: Regex summary blacklist.
regex_blacklist_content: Regex content blacklist.
regex_blacklist_author: Regex author blacklist.
Returns:
HTMLResponse: Rendered filter preview fragment.
"""
clean_feed_url: str = urllib.parse.unquote(feed_url.strip())
feed: Feed = reader.get_feed(clean_feed_url)
form_values: dict[str, str] = {
"blacklist_title": blacklist_title, "blacklist_title": blacklist_title,
"blacklist_summary": blacklist_summary, "blacklist_summary": blacklist_summary,
"blacklist_content": blacklist_content, "blacklist_content": blacklist_content,
@ -569,7 +661,347 @@ async def get_blacklist(
"regex_blacklist_content": regex_blacklist_content, "regex_blacklist_content": regex_blacklist_content,
"regex_blacklist_author": regex_blacklist_author, "regex_blacklist_author": regex_blacklist_author,
} }
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
return templates.TemplateResponse(
request=request,
name="_filter_preview.html",
context={
"request": request,
"feed": feed,
**build_filter_preview_context(reader, feed, "blacklist", form_values=form_values),
},
)
def build_filter_form_context(filter_name: str, values: dict[str, str]) -> dict[str, str]:
"""Return template context keys for a filter form.
Args:
filter_name: Either blacklist or whitelist.
values: Normalized filter values.
Returns:
dict[str, str]: Template keys matching current form field names.
"""
context: dict[str, str] = {}
for field_name in FILTER_FIELDS:
context[f"{filter_name}_{field_name}"] = values[field_name]
context[f"regex_{filter_name}_{field_name}"] = values[f"regex_{field_name}"]
return context
def build_filter_preview_context(
reader: Reader,
feed: Feed,
filter_name: str,
form_values: dict[str, str] | None = None,
) -> dict[str, Any]:
"""Build preview data for the blacklist and whitelist pages.
Args:
reader: The Reader instance.
feed: The feed being previewed.
filter_name: Either blacklist or whitelist.
form_values: Optional unsaved values from the current form.
Returns:
dict[str, Any]: Preview context for template rendering.
"""
saved_blacklist_values: dict[str, str] = get_filter_values_from_reader(reader, feed, "blacklist")
saved_whitelist_values: dict[str, str] = get_filter_values_from_reader(reader, feed, "whitelist")
preview_blacklist_values: dict[str, str] = saved_blacklist_values
preview_whitelist_values: dict[str, str] = saved_whitelist_values
helper_text: str = "Saved whitelist rules still apply while previewing blacklist changes."
if filter_name == "blacklist":
preview_blacklist_values = coerce_filter_values("blacklist", form_values)
else:
preview_whitelist_values = coerce_filter_values("whitelist", form_values)
helper_text = "Saved blacklist rules still apply while previewing whitelist changes."
preview_entries: list[Entry] = list(reader.get_entries(feed=feed, limit=FILTER_PREVIEW_LIMIT))
preview_rows: list[dict[str, Any]] = []
preview_decisions: dict[str, EntryFilterDecision] = {}
sent_count = 0
skipped_count = 0
blacklist_match_count = 0
whitelist_match_count = 0
for entry in preview_entries:
decision: EntryFilterDecision = evaluate_entry_filters(
entry,
blacklist_values=preview_blacklist_values,
whitelist_values=preview_whitelist_values,
)
preview_decisions[get_entry_decision_key(entry)] = decision
if decision.should_send:
sent_count += 1
else:
skipped_count += 1
if decision.blacklist_match:
blacklist_match_count += 1
if decision.whitelist_match:
whitelist_match_count += 1
published_label: str = "Unknown date"
if entry.published:
published_label = entry.published.strftime("%Y-%m-%d %H:%M:%S")
preview_rows.append(
{
"entry": entry,
"decision": decision,
"field_rows": build_preview_field_rows(entry, decision),
"published_label": published_label,
"status_label": "Sent" if decision.should_send else "Skipped",
"status_class": "success" if decision.should_send else "danger",
},
)
preview_html: str = create_html_for_feed(
reader=reader,
entries=preview_entries,
current_feed_url=feed.url,
entry_decisions=preview_decisions,
)
return {
"filter_name": filter_name,
"filter_label": filter_name.title(),
"preview_entries": preview_entries,
"preview_rows": preview_rows,
"preview_html": preview_html,
"preview_limit": FILTER_PREVIEW_LIMIT,
"preview_summary": {
"total": len(preview_entries),
"sent": sent_count,
"skipped": skipped_count,
"blacklist_matches": blacklist_match_count,
"whitelist_matches": whitelist_match_count,
},
"preview_helper_text": helper_text,
}
def build_preview_field_rows(entry: Entry, decision: EntryFilterDecision) -> list[dict[str, Any]]:
"""Build labeled preview fields for the filter UI.
Args:
entry: Entry whose values should be shown.
decision: The final decision for the entry.
Returns:
list[dict[str, Any]]: Labeled field rows for the preview template.
"""
entry_fields: dict[str, str] = get_entry_fields(entry)
field_rows: list[dict[str, Any]] = []
for field_name in ("title", "author", "summary", "content"):
badges: list[dict[str, str]] = []
matches: list[tuple[FilterMatch, str]] = []
if decision.blacklist_match and decision.blacklist_match.field_name == field_name:
badges.append({"label": "Blacklist match", "class": "danger"})
matches.append((decision.blacklist_match, "danger"))
if decision.whitelist_match and decision.whitelist_match.field_name == field_name:
badges.append({"label": "Whitelist match", "class": "success"})
matches.append((decision.whitelist_match, "success"))
field_rows.append(
{
"label": PREVIEW_FIELD_LABELS[field_name],
"value_html": format_preview_field_value(entry_fields[field_name], matches),
"badges": badges,
},
)
return field_rows
def format_preview_field_value(
value: str,
matches: list[tuple[FilterMatch, str]],
max_length: int = 280,
) -> str:
"""Convert entry field content into readable preview text with highlight markup.
Args:
value: Raw field value from the entry.
matches: Matching filters for this field and their display classes.
max_length: Max number of characters to display.
Returns:
str: Normalized preview HTML.
"""
normalized_value: str = normalize_preview_field_value(value)
if not normalized_value:
return "No value"
highlighted_span, highlight_class = get_preview_highlight_span(normalized_value, matches)
clipped_value, clipped_span = clip_preview_value(normalized_value, highlighted_span, max_length)
if clipped_span is None or highlight_class is None:
return escape(clipped_value)
start, end = clipped_span
return "".join(
[
escape(clipped_value[:start]),
f'<mark class="filter-preview__match filter-preview__match--{highlight_class}">',
escape(clipped_value[start:end]),
"</mark>",
escape(clipped_value[end:]),
],
)
def normalize_preview_field_value(value: str) -> str:
"""Convert entry field content into readable plain text.
Args:
value: Raw field value.
Returns:
str: Plain-text preview value.
"""
if not value:
return ""
plain_text: str = PREVIEW_HTML_TAG_PATTERN.sub(" ", value)
return PREVIEW_WHITESPACE_PATTERN.sub(" ", unescape(plain_text)).strip()
def get_preview_highlight_span(
value: str,
matches: list[tuple[FilterMatch, str]],
) -> tuple[tuple[int, int] | None, str | None]:
"""Return the earliest highlight span for the preview field.
Args:
value: Normalized field value.
matches: Matching filters and associated preview classes.
Returns:
tuple[tuple[int, int] | None, str | None]: Span and highlight class.
"""
first_span: tuple[int, int] | None = None
first_class: str | None = None
for match, highlight_class in matches:
span = get_filter_match_span(value, match)
if span is None:
continue
if first_span is None or span[0] < first_span[0]:
first_span = span
first_class = highlight_class
return first_span, first_class
def get_filter_match_span(value: str, match: FilterMatch) -> tuple[int, int] | None:
"""Return the matched substring span for a preview field.
Args:
value: Normalized preview value.
match: Matching filter metadata.
Returns:
tuple[int, int] | None: The first matching span if found.
"""
if match.match_type == "regex":
return get_regex_match_span(value, match.pattern)
return get_text_match_span(value, match.pattern)
def get_text_match_span(value: str, pattern: str) -> tuple[int, int] | None:
"""Return the earliest case-insensitive substring span for comma-separated text terms."""
earliest_span: tuple[int, int] | None = None
for term in [part.strip() for part in pattern.split(",") if part.strip()]:
compiled_pattern = re.compile(re.escape(term), re.IGNORECASE)
match = compiled_pattern.search(value)
if match and (earliest_span is None or match.start() < earliest_span[0]):
earliest_span = match.span()
return earliest_span
def get_regex_match_span(value: str, pattern: str) -> tuple[int, int] | None:
"""Return the earliest regex match span for newline/comma-separated patterns."""
earliest_span: tuple[int, int] | None = None
for pattern_str in split_regex_patterns(pattern):
try:
compiled_pattern = re.compile(pattern_str, re.IGNORECASE)
except re.error:
continue
match = compiled_pattern.search(value)
if match and match.start() != match.end():
current_span = match.span()
if earliest_span is None or current_span[0] < earliest_span[0]:
earliest_span = current_span
return earliest_span
def split_regex_patterns(pattern: str) -> list[str]:
"""Split regex filter text using the same newline/comma semantics as the matcher.
Args:
pattern: The raw regex pattern string.
Returns:
list[str]: A list of individual regex patterns.
"""
regex_patterns: list[str] = []
for line in pattern.split("\n"):
stripped_line = line.strip()
if not stripped_line:
continue
if "," in stripped_line:
regex_patterns.extend([part.strip() for part in stripped_line.split(",") if part.strip()])
else:
regex_patterns.append(stripped_line)
return regex_patterns
def clip_preview_value(
value: str,
highlight_span: tuple[int, int] | None,
max_length: int,
) -> tuple[str, tuple[int, int] | None]:
"""Clip a preview value while keeping the highlighted match visible when possible.
Args:
value: The normalized preview value.
highlight_span: The span of the highlighted match within the value.
max_length: The maximum length of the clipped value.
Returns:
tuple[str, tuple[int, int] | None]: The clipped preview value and adjusted highlight
"""
if len(value) <= max_length:
return value, highlight_span
if highlight_span is None:
return f"{value[: max_length - 1].rstrip()}", None
match_start, match_end = highlight_span
window_start = max(0, match_start - (max_length // 3))
window_end = min(len(value), window_start + max_length)
if match_end > window_end:
window_end = min(len(value), match_end + (max_length // 3))
window_start = max(0, window_end - max_length)
clipped_value = value[window_start:window_end]
clipped_span = (match_start - window_start, match_end - window_start)
if window_start > 0:
clipped_value = f"{clipped_value}"
clipped_span = (clipped_span[0] + 1, clipped_span[1] + 1)
if window_end < len(value):
clipped_value = f"{clipped_value}"
return clipped_value, clipped_span
@app.post("/custom") @app.post("/custom")
@ -1239,6 +1671,7 @@ def create_html_for_feed( # noqa: C901, PLR0914
reader: Reader, reader: Reader,
entries: Iterable[Entry], entries: Iterable[Entry],
current_feed_url: str = "", current_feed_url: str = "",
entry_decisions: dict[str, EntryFilterDecision] | None = None,
) -> str: ) -> str:
"""Create HTML for the search results. """Create HTML for the search results.
@ -1246,6 +1679,7 @@ def create_html_for_feed( # noqa: C901, PLR0914
reader: The Reader instance to use. reader: The Reader instance to use.
entries: The entries to create HTML for. entries: The entries to create HTML for.
current_feed_url: The feed URL currently being viewed in /feed. current_feed_url: The feed URL currently being viewed in /feed.
entry_decisions: Optional preview decisions keyed by feed URL and entry id.
Returns: Returns:
str: The HTML for the search results. str: The HTML for the search results.
@ -1268,12 +1702,22 @@ def create_html_for_feed( # noqa: C901, PLR0914
if entry.published: if entry.published:
published: str = entry.published.strftime("%Y-%m-%d %H:%M:%S") published: str = entry.published.strftime("%Y-%m-%d %H:%M:%S")
decision: EntryFilterDecision | None = None
if entry_decisions is not None:
decision = entry_decisions.get(get_entry_decision_key(entry))
is_blacklisted: bool = entry_is_blacklisted(entry, reader=reader)
is_whitelisted: bool = entry_is_whitelisted(entry, reader=reader)
if decision is not None:
is_blacklisted = decision.blacklist_match is not None
is_whitelisted = decision.whitelist_match is not None
blacklisted: str = "" blacklisted: str = ""
if entry_is_blacklisted(entry, reader=reader): if is_blacklisted:
blacklisted = "<span class='badge bg-danger'>Blacklisted</span>" blacklisted = "<span class='badge bg-danger'>Blacklisted</span>"
whitelisted: str = "" whitelisted: str = ""
if entry_is_whitelisted(entry, reader=reader): if is_whitelisted:
whitelisted = "<span class='badge bg-success'>Whitelisted</span>" whitelisted = "<span class='badge bg-success'>Whitelisted</span>"
source_feed_url: str = getattr(entry, "original_feed_url", None) or entry.feed.url source_feed_url: str = getattr(entry, "original_feed_url", None) or entry.feed.url

View file

@ -64,3 +64,127 @@ body {
overflow-wrap: anywhere; overflow-wrap: anywhere;
word-break: break-word; word-break: break-word;
} }
.filter-page__sidebar {
height: 100%;
}
.filter-page__example {
white-space: pre-wrap;
overflow-wrap: anywhere;
color: #d8d8d8;
font-size: 0.9rem;
}
.filter-preview__list {
max-height: 48vh;
overflow-y: auto;
padding-right: 0.25rem;
}
.filter-preview__content {
min-width: 0;
}
.filter-preview__link {
overflow-wrap: anywhere;
}
.filter-preview__status {
min-width: 5.5rem;
}
.filter-preview__pattern {
display: inline-flex;
align-items: center;
padding: 0.15rem 0.5rem;
border: 1px solid #2f2f2f;
background: #1b1b1b;
color: #d7d7d7;
overflow-wrap: anywhere;
}
.filter-preview__rendered {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.filter-preview__rendered>div {
margin-bottom: 0;
}
.filter-preview__field-table {
display: flex;
flex-direction: column;
gap: 0;
border: 1px solid #2f2f2f;
background: #1b1b1b;
}
.filter-preview__field-row {
display: grid;
grid-template-columns: minmax(5.5rem, 6.75rem) minmax(0, 1fr) auto;
gap: 0.75rem;
align-items: start;
padding: 0.55rem 0.75rem;
border-bottom: 1px solid #2a2a2a;
}
.filter-preview__field-row:last-child {
border-bottom: 0;
}
.filter-preview__field-name {
color: #d8d8d8;
font-size: 0.78rem;
font-weight: 600;
letter-spacing: 0.04em;
text-transform: uppercase;
}
.filter-preview__field-value {
color: #bfbfbf;
overflow-wrap: anywhere;
word-break: break-word;
}
.filter-preview__field-badges {
display: flex;
flex-wrap: wrap;
justify-content: flex-end;
gap: 0.25rem;
}
.filter-preview__match {
padding: 0 0.15rem;
border-radius: 0.15rem;
}
.filter-preview__match--danger {
background: #652020;
color: #ffe2e2;
}
.filter-preview__match--success {
background: #1e5330;
color: #def8e5;
}
@media (max-width: 767.98px) {
.filter-preview__field-row {
grid-template-columns: 1fr;
gap: 0.35rem;
}
.filter-preview__field-badges {
justify-content: flex-start;
}
}
@media (min-width: 992px) {
.filter-page__sidebar {
position: sticky;
top: 1rem;
}
}

View file

@ -0,0 +1,86 @@
<div class="d-flex flex-column gap-4 filter-preview">
<div class="d-flex flex-column flex-md-row justify-content-between align-items-start gap-3">
<div>
<h3 class="h5 mb-1">Live preview</h3>
<p class="text-muted mb-0">Latest {{ preview_limit }} entries from {{ feed.title or feed.url }}</p>
</div>
<div class="d-flex flex-wrap gap-2">
<span class="badge bg-secondary">{{ preview_summary.total }} checked</span>
<span class="badge bg-success">{{ preview_summary.sent }} sent</span>
<span class="badge bg-danger">{{ preview_summary.skipped }} skipped</span>
<span class="badge bg-warning text-dark">{{ preview_summary.blacklist_matches }} blacklist match{{ 'es' if preview_summary.blacklist_matches != 1 else '' }}</span>
<span class="badge bg-info text-dark">{{ preview_summary.whitelist_matches }} whitelist match{{ 'es' if preview_summary.whitelist_matches != 1 else '' }}</span>
</div>
</div>
<p class="text-muted small mb-0">{{ preview_helper_text }}</p>
<section>
<div class="d-flex flex-wrap justify-content-between align-items-center gap-2 mb-3">
<h4 class="h6 text-uppercase text-muted mb-0">Decision list</h4>
<span class="text-muted small">Updates as you type. Saving is still manual.</span>
</div>
{% if preview_rows %}
<div class="d-flex flex-column gap-2 filter-preview__list">
{% for row in preview_rows %}
<article class="p-3 border border-dark rounded-0 filter-preview__item">
<div class="d-flex flex-column flex-md-row justify-content-between align-items-start gap-3 mb-2">
<div class="filter-preview__content">
<h5 class="h6 mb-1">
{% if row.entry.link %}
<a class="text-muted text-decoration-none filter-preview__link"
href="{{ row.entry.link }}">{{ row.entry.title or row.entry.id }}</a>
{% else %}
<span class="text-light">{{ row.entry.title or row.entry.id }}</span>
{% endif %}
</h5>
<p class="text-muted small mb-0">
{% if row.entry.author %}By {{ row.entry.author }} |{% endif %}
{{ row.published_label }}
</p>
</div>
<span class="badge bg-{{ row.status_class }} filter-preview__status">{{ row.status_label }}</span>
</div>
<p class="mb-2">{{ row.decision.reason }}</p>
<div class="d-flex flex-wrap gap-2 align-items-center small">
{% if row.decision.blacklist_match %}
<span class="badge bg-danger">{{ row.decision.blacklist_match.description }}</span>
<span class="filter-preview__pattern">{{ row.decision.blacklist_match.pattern }}</span>
{% endif %}
{% if row.decision.whitelist_match %}
<span class="badge bg-success">{{ row.decision.whitelist_match.description }}</span>
<span class="filter-preview__pattern">{{ row.decision.whitelist_match.pattern }}</span>
{% endif %}
</div>
<div class="filter-preview__field-table mt-2">
{% for field in row.field_rows %}
<section class="filter-preview__field-row">
<div class="filter-preview__field-name">{{ field.label }}</div>
<div class="filter-preview__field-value">{{ field.value_html|safe }}</div>
<div class="filter-preview__field-badges">
{% for badge in field.badges %}<span class="badge bg-{{ badge.class }}">{{ badge.label }}</span>{% endfor %}
</div>
</section>
{% endfor %}
</div>
</article>
{% endfor %}
</div>
{% else %}
<div class="p-3 border border-dark rounded-0">
<p class="text-muted mb-0">No entries are available yet for this feed, so there is nothing to preview.</p>
</div>
{% endif %}
</section>
<section>
<div class="d-flex flex-wrap justify-content-between align-items-center gap-2 mb-3">
<h4 class="h6 text-uppercase text-muted mb-0">Rendered entries</h4>
<span class="text-muted small">Uses the same entry rendering as the feed page.</span>
</div>
{% if preview_html %}
<div class="filter-preview__rendered">{{ preview_html|safe }}</div>
{% else %}
<div class="p-3 border border-dark rounded-0">
<p class="text-muted mb-0">Rendered preview will appear here when entries are available.</p>
</div>
{% endif %}
</section>
</div>

View file

@ -1,98 +1,126 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block title %} {% block title %}
| Blacklist | Blacklist
{% endblock title %} {% endblock title %}
{% block content %} {% block content %}
<div class="p-2 border border-dark"> <div class="row g-3 filter-page">
<form action="/blacklist" method="post"> <div class="col-lg-5">
<!-- Feed URL --> <section class="card border border-dark shadow-sm text-light rounded-0 filter-page__sidebar">
<div class="row pb-2"> <div class="card-body p-3 p-md-4">
<div class="col-sm-12"> <div class="mb-4">
<div class="form-text"> <h2 class="h4 mb-2">Blacklist Rules</h2>
<ul class="list-inline"> <p class="text-muted mb-3">
<li> Build block rules on the left and watch the latest feed entries update on the right before you save.
Comma separated list of words to blacklist. If a word is found in the </p>
corresponding blacklists, the feed will not be sent. <div class="p-3 border border-dark rounded-0 small text-muted">
</li> <p class="mb-2">
<li>Whitelist always takes precedence over blacklist. Leave empty to disable.</li> Use comma-separated terms or snippets for quick blocking. Use regex when the pattern is more specific.
<li>Words are case-insensitive. No spaces should be used before or after the comma.</li> </p>
<li> <p class="mb-2">
Correct: Plain text matching is case-insensitive and partial, so <code>orld</code> matches <code>World of Warcraft</code>.
<code> </p>
primogem,events,gameplay preview,special program <p class="mb-2">Whitelist matches still win. If an entry matches both, the preview keeps it as sent.</p>
</code> <p class="mb-0">Keep the left side for editing and the right side for checking what gets removed.</p>
</li> </div>
<li>
Wrong:
<code>
primogem, events, gameplay preview, special program
</code>
</li>
</ul>
</div>
<label for="blacklist_title" class="col-sm-6 col-form-label">Blacklist - Title</label>
<input name="blacklist_title" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_title" value="{%- if blacklist_title -%}{{ blacklist_title }}{%- endif -%}" />
<label for="blacklist_summary" class="col-sm-6 col-form-label">Blacklist - Summary</label>
<input name="blacklist_summary" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_summary" value="{%- if blacklist_summary -%}{{ blacklist_summary }}{%- endif -%}" />
<label for="blacklist_content" class="col-sm-6 col-form-label">Blacklist - Content</label>
<input name="blacklist_content" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_content" value="{%- if blacklist_content -%}{{ blacklist_content }}{%- endif -%}" />
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
<input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
<div class="mt-4">
<div class="form-text">
<ul class="list-inline">
<li>
Regular expression patterns for advanced filtering. Each pattern should be on a new
line.
</li>
<li>Patterns are case-insensitive.</li>
<li>
Examples:
<code>
<pre>
^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*
</pre>
</code>
</li>
</ul>
</div> </div>
<label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label> <form action="/blacklist"
<textarea name="regex_blacklist_title" class="form-control bg-dark border-dark text-muted" method="post"
id="regex_blacklist_title" class="row g-3"
rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea> hx-get="/blacklist_preview"
hx-target="#filter-preview"
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">Regex Blacklist - hx-swap="innerHTML"
Summary</label> hx-trigger="input delay:400ms, change delay:200ms">
<textarea name="regex_blacklist_summary" class="form-control bg-dark border-dark text-muted" <input type="hidden" name="feed_url" value="{{ feed.url }}" />
id="regex_blacklist_summary" <div class="col-12">
rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea> <h3 class="h6 text-uppercase text-muted mb-3">Word Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">Regex Blacklist - <p class="mb-2">Comma separated terms or snippets. Spaces around commas are ignored.</p>
Content</label> <p class="mb-0">
<textarea name="regex_blacklist_content" class="form-control bg-dark border-dark text-muted" Example:
id="regex_blacklist_content" <code>primogem,events,orld,special program</code>
rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea> </p>
</div>
<label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label> <label for="blacklist_title" class="form-label">Block if title contains</label>
<textarea name="regex_blacklist_author" class="form-control bg-dark border-dark text-muted" <input name="blacklist_title"
id="regex_blacklist_author" type="text"
rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea> class="form-control bg-dark border-dark text-muted"
id="blacklist_title"
value="{{ blacklist_title }}" />
</div>
<div class="col-12">
<label for="blacklist_summary" class="form-label">Block if summary contains</label>
<input name="blacklist_summary"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_summary"
value="{{ blacklist_summary }}" />
</div>
<div class="col-12">
<label for="blacklist_content" class="form-label">Block if content contains</label>
<input name="blacklist_content"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_content"
value="{{ blacklist_content }}" />
</div>
<div class="col-12">
<label for="blacklist_author" class="form-label">Block if author contains</label>
<input name="blacklist_author"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_author"
value="{{ blacklist_author }}" />
</div>
<div class="col-12 pt-2">
<h3 class="h6 text-uppercase text-muted mb-3">Regex Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<p class="mb-2">One pattern per line. Matching is case-insensitive.</p>
<pre class="mb-0 filter-page__example">^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*</pre>
</div>
<label for="regex_blacklist_title" class="form-label">Block if title matches regex</label>
<textarea name="regex_blacklist_title"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_title"
rows="3">{{ regex_blacklist_title }}</textarea>
</div>
<div class="col-12">
<label for="regex_blacklist_summary" class="form-label">Block if summary matches regex</label>
<textarea name="regex_blacklist_summary"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_summary"
rows="3">{{ regex_blacklist_summary }}</textarea>
</div>
<div class="col-12">
<label for="regex_blacklist_content" class="form-label">Block if content matches regex</label>
<textarea name="regex_blacklist_content"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_content"
rows="3">{{ regex_blacklist_content }}</textarea>
</div>
<div class="col-12">
<label for="regex_blacklist_author" class="form-label">Block if author matches regex</label>
<textarea name="regex_blacklist_author"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_author"
rows="3">{{ regex_blacklist_author }}</textarea>
</div>
<div class="col-12 d-flex flex-wrap gap-2 pt-2">
<button class="btn btn-dark btn-sm" type="submit">Update blacklist</button>
<a class="btn btn-outline-light btn-sm"
href="/feed?feed_url={{ feed.url|encode_url }}">Back to feed</a>
</div>
</form>
</div> </div>
</div> </section>
</div> </div>
<!-- Add a hidden feed_url field to the form --> <div class="col-lg-7">
<input type="hidden" name="feed_url" value="{{ feed.url }}" /> <section class="card border border-dark shadow-sm text-light rounded-0 h-100">
<!-- Submit button --> <div class="card-body p-3 p-md-4">
<div class="d-md-flex"> <div id="filter-preview">{% include "_filter_preview.html" %}</div>
<button class="btn btn-dark btn-sm">Update blacklist</button> </div>
</section>
</div> </div>
</form> </div>
</div>
{% endblock content %} {% endblock content %}

View file

@ -1,98 +1,124 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block title %} {% block title %}
| Whitelist | Whitelist
{% endblock title %} {% endblock title %}
{% block content %} {% block content %}
<div class="p-2 border border-dark"> <div class="row g-3 filter-page">
<form action="/whitelist" method="post"> <div class="col-lg-5">
<!-- Feed URL --> <section class="card border border-dark shadow-sm text-light rounded-0 filter-page__sidebar">
<div class="row pb-2"> <div class="card-body p-3 p-md-4">
<div class="col-sm-12"> <div class="mb-4">
<div class="form-text"> <h2 class="h4 mb-2">Whitelist Rules</h2>
<ul class="list-inline"> <p class="text-muted mb-3">
<li> Shape what is allowed through, and use the live pane to see which entries are the only ones that will still be sent.
Comma separated list of words to whitelist. Only send message to </p>
Discord if one of these words are present in the corresponding fields. <div class="p-3 border border-dark rounded-0 small text-muted">
</li> <p class="mb-2">Whitelist rules are restrictive. If any whitelist rule exists, entries must match it to be sent.</p>
<li>Whitelist always takes precedence over blacklist. Leave empty to disable.</li> <p class="mb-2">
<li>Words are case-insensitive. No spaces should be used before or after the comma.</li> Plain text matching is case-insensitive and partial, so <code>orld</code> matches <code>World of Warcraft</code>.
<li> </p>
Correct: <p class="mb-2">When an entry matches both lists, whitelist still wins and the preview shows it as sent.</p>
<code> <p class="mb-0">Saved blacklist rules remain active while you preview whitelist edits.</p>
primogem,events,gameplay preview,special program </div>
</code>
</li>
<li>
Wrong:
<code>
primogem, events, gameplay preview, special program
</code>
</li>
</ul>
</div>
<label for="whitelist_title" class="col-sm-6 col-form-label">Whitelist - Title</label>
<input name="whitelist_title" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_title" value="{%- if whitelist_title -%}{{ whitelist_title }} {%- endif -%}" />
<label for="whitelist_summary" class="col-sm-6 col-form-label">Whitelist - Summary</label>
<input name="whitelist_summary" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_summary" value="{%- if whitelist_summary -%}{{ whitelist_summary }}{%- endif -%}" />
<label for="whitelist_content" class="col-sm-6 col-form-label">Whitelist - Content</label>
<input name="whitelist_content" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_content" value="{%- if whitelist_content -%}{{ whitelist_content }}{%- endif -%}" />
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
<input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
<div class="mt-4">
<div class="form-text">
<ul class="list-inline">
<li>
Regular expression patterns for advanced filtering. Each pattern should be on a new
line.
</li>
<li>Patterns are case-insensitive.</li>
<li>
Examples:
<code>
<pre>
^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*
</pre>
</code>
</li>
</ul>
</div> </div>
<label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label> <form action="/whitelist"
<textarea name="regex_whitelist_title" class="form-control bg-dark border-dark text-muted" method="post"
id="regex_whitelist_title" class="row g-3"
rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea> hx-get="/whitelist_preview"
hx-target="#filter-preview"
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">Regex Whitelist - hx-swap="innerHTML"
Summary</label> hx-trigger="input delay:400ms, change delay:200ms">
<textarea name="regex_whitelist_summary" class="form-control bg-dark border-dark text-muted" <input type="hidden" name="feed_url" value="{{ feed.url }}" />
id="regex_whitelist_summary" <div class="col-12">
rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea> <h3 class="h6 text-uppercase text-muted mb-3">Word Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">Regex Whitelist - <p class="mb-2">Comma separated terms or snippets. Spaces around commas are ignored.</p>
Content</label> <p class="mb-0">
<textarea name="regex_whitelist_content" class="form-control bg-dark border-dark text-muted" Example:
id="regex_whitelist_content" <code>primogem,events,orld,special program</code>
rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea> </p>
</div>
<label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label> <label for="whitelist_title" class="form-label">Allow if title contains</label>
<textarea name="regex_whitelist_author" class="form-control bg-dark border-dark text-muted" <input name="whitelist_title"
id="regex_whitelist_author" type="text"
rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea> class="form-control bg-dark border-dark text-muted"
id="whitelist_title"
value="{{ whitelist_title }}" />
</div>
<div class="col-12">
<label for="whitelist_summary" class="form-label">Allow if summary contains</label>
<input name="whitelist_summary"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_summary"
value="{{ whitelist_summary }}" />
</div>
<div class="col-12">
<label for="whitelist_content" class="form-label">Allow if content contains</label>
<input name="whitelist_content"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_content"
value="{{ whitelist_content }}" />
</div>
<div class="col-12">
<label for="whitelist_author" class="form-label">Allow if author contains</label>
<input name="whitelist_author"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_author"
value="{{ whitelist_author }}" />
</div>
<div class="col-12 pt-2">
<h3 class="h6 text-uppercase text-muted mb-3">Regex Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<p class="mb-2">One pattern per line. Matching is case-insensitive.</p>
<pre class="mb-0 filter-page__example">^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*</pre>
</div>
<label for="regex_whitelist_title" class="form-label">Allow if title matches regex</label>
<textarea name="regex_whitelist_title"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_title"
rows="3">{{ regex_whitelist_title }}</textarea>
</div>
<div class="col-12">
<label for="regex_whitelist_summary" class="form-label">Allow if summary matches regex</label>
<textarea name="regex_whitelist_summary"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_summary"
rows="3">{{ regex_whitelist_summary }}</textarea>
</div>
<div class="col-12">
<label for="regex_whitelist_content" class="form-label">Allow if content matches regex</label>
<textarea name="regex_whitelist_content"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_content"
rows="3">{{ regex_whitelist_content }}</textarea>
</div>
<div class="col-12">
<label for="regex_whitelist_author" class="form-label">Allow if author matches regex</label>
<textarea name="regex_whitelist_author"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_author"
rows="3">{{ regex_whitelist_author }}</textarea>
</div>
<div class="col-12 d-flex flex-wrap gap-2 pt-2">
<button class="btn btn-dark btn-sm" type="submit">Update whitelist</button>
<a class="btn btn-outline-light btn-sm"
href="/feed?feed_url={{ feed.url|encode_url }}">Back to feed</a>
</div>
</form>
</div> </div>
</div> </section>
</div> </div>
<!-- Add a hidden feed_url field to the form --> <div class="col-lg-7">
<input type="hidden" name="feed_url" value="{{ feed.url }}" /> <section class="card border border-dark shadow-sm text-light rounded-0 h-100">
<!-- Submit button --> <div class="card-body p-3 p-md-4">
<div class="d-md-flex"> <div id="filter-preview">{% include "_filter_preview.html" %}</div>
<button class="btn btn-dark btn-sm">Update whitelist</button> </div>
</section>
</div> </div>
</form> </div>
</div>
{% endblock content %} {% endblock content %}

View file

@ -11,6 +11,8 @@ from reader import make_reader
from discord_rss_bot.filter.blacklist import entry_should_be_skipped from discord_rss_bot.filter.blacklist import entry_should_be_skipped
from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Iterable from collections.abc import Iterable
@ -203,3 +205,54 @@ def test_regex_should_be_skipped() -> None:
) )
reader.delete_tag(feed, "regex_blacklist_author") reader.delete_tag(feed, "regex_blacklist_author")
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}" assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
def test_whitelist_match_overrides_blacklist_match() -> None:
"""A whitelist hit should beat a blacklist hit in the final decision."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
reader.set_tag(feed, "whitelist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
decision = evaluate_entry_filters(
first_entry[0],
blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"),
)
assert decision.should_send is True, "Whitelist match should override blacklist match"
assert decision.blacklist_match is not None, "Expected a blacklist match"
assert decision.whitelist_match is not None, "Expected a whitelist match"
assert "whitelist overrides blacklist" in decision.reason
def test_blacklist_substring_match_on_title() -> None:
"""Blacklist plain-text rules should match title substrings."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
reader.set_tag(feed, "blacklist_title", "vnnnfn") # pyright: ignore[reportArgumentType]
assert entry_should_be_skipped(reader, first_entry[0]) is True, "Substring title match should blacklist the entry"

View file

@ -37,6 +37,15 @@ def encoded_feed_url(url: str) -> str:
return urllib.parse.quote(feed_url) if url else "" return urllib.parse.quote(feed_url) if url else ""
def ensure_preview_feed_exists() -> Reader:
reader: Reader = get_reader_dependency()
with contextlib.suppress(Exception):
reader.add_feed(feed_url)
with contextlib.suppress(Exception):
reader.update_feed(feed_url)
return reader
def test_search() -> None: def test_search() -> None:
"""Test the /search page.""" """Test the /search page."""
# Remove the feed if it already exists before we run the test. # Remove the feed if it already exists before we run the test.
@ -221,6 +230,221 @@ def test_get() -> None:
assert response.status_code == 200, f"/whitelist failed: {response.text}" assert response.status_code == 200, f"/whitelist failed: {response.text}"
def test_blacklist_page_uses_live_preview_layout() -> None:
ensure_preview_feed_exists()
response: Response = client.get(url="/blacklist", params={"feed_url": encoded_feed_url(feed_url)})
assert response.status_code == 200, f"/blacklist failed: {response.text}"
assert 'hx-get="/blacklist_preview"' in response.text
assert 'id="filter-preview"' in response.text
assert "Blacklist Rules" in response.text
def test_whitelist_page_uses_live_preview_layout() -> None:
ensure_preview_feed_exists()
response: Response = client.get(url="/whitelist", params={"feed_url": encoded_feed_url(feed_url)})
assert response.status_code == 200, f"/whitelist failed: {response.text}"
assert 'hx-get="/whitelist_preview"' in response.text
assert 'id="filter-preview"' in response.text
assert "Whitelist Rules" in response.text
def test_blacklist_preview_does_not_persist_unsaved_rules() -> None:
reader: Reader = ensure_preview_feed_exists()
reader.set_tag(feed_url, "blacklist_title", "saved-blacklist") # pyright: ignore[reportArgumentType]
try:
response: Response = client.get(
url="/blacklist_preview",
params={
"feed_url": feed_url,
"blacklist_title": "fvnnnfnfdnfdnfd",
},
)
assert response.status_code == 200, f"/blacklist_preview failed: {response.text}"
assert "Live preview" in response.text
assert reader.get_tag(feed_url, "blacklist_title", "") == "saved-blacklist"
finally:
with contextlib.suppress(Exception):
reader.delete_tag(feed_url, "blacklist_title")
def test_whitelist_preview_shows_precedence_over_blacklist() -> None:
reader: Reader = ensure_preview_feed_exists()
reader.set_tag(feed_url, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
try:
response: Response = client.get(
url="/whitelist_preview",
params={
"feed_url": feed_url,
"whitelist_title": "fvnnnfnfdnfdnfd",
},
)
assert response.status_code == 200, f"/whitelist_preview failed: {response.text}"
assert "whitelist overrides blacklist" in response.text
assert "Sent" in response.text
finally:
with contextlib.suppress(Exception):
reader.delete_tag(feed_url, "blacklist_title")
def test_blacklist_preview_uses_50_entry_limit() -> None:
@dataclass(slots=True)
class DummyContent:
value: str
@dataclass(slots=True)
class DummyFeed:
url: str
title: str
@dataclass(slots=True)
class DummyEntry:
id: str
feed: DummyFeed
title: str
summary: str
author: str
link: str
published: datetime | None
content: list[DummyContent] = field(default_factory=lambda: [DummyContent("content")])
class StubReader:
def __init__(self) -> None:
self.feed = DummyFeed(url="https://example.com/filter-preview.xml", title="Preview Feed")
self.recorded_limit: int | None = None
self.entries: list[Entry] = [
cast(
"Entry",
DummyEntry(
id=f"entry-{index}",
feed=self.feed,
title=f"Entry {index}",
summary=f"Summary {index}",
author="Author",
link=f"https://example.com/entry-{index}",
published=datetime(2024, 1, 1, tzinfo=UTC),
),
)
for index in range(60)
]
def get_feed(self, _feed_url: str) -> DummyFeed:
return self.feed
def get_entries(self, **kwargs: object) -> list[Entry]:
limit = kwargs.get("limit")
self.recorded_limit = limit if isinstance(limit, int) else None
if isinstance(limit, int):
return self.entries[:limit]
return self.entries
def get_tag(self, _resource: object, _key: str, default: object = None) -> object:
return default
stub_reader = StubReader()
app.dependency_overrides[get_reader_dependency] = lambda: stub_reader
try:
with patch("discord_rss_bot.main.create_html_for_feed", return_value="<div>Rendered</div>"):
response: Response = client.get(
url="/blacklist_preview",
params={"feed_url": stub_reader.feed.url},
)
assert response.status_code == 200, f"/blacklist_preview failed: {response.text}"
assert stub_reader.recorded_limit == 50, (
f"Expected preview to request 50 entries, got {stub_reader.recorded_limit}"
)
assert "50 checked" in response.text
finally:
app.dependency_overrides = {}
def test_blacklist_preview_shows_labeled_field_values_for_substring_match() -> None:
@dataclass(slots=True)
class DummyContent:
value: str
@dataclass(slots=True)
class DummyFeed:
url: str
title: str
@dataclass(slots=True)
class DummyEntry:
id: str
feed: DummyFeed
title: str
summary: str
author: str
link: str
published: datetime | None
content: list[DummyContent] = field(default_factory=list)
class StubReader:
def __init__(self) -> None:
self.feed = DummyFeed(url="https://example.com/wow.xml", title="Warcraft Feed")
self.entries: list[Entry] = [
cast(
"Entry",
DummyEntry(
id="wow-1",
feed=self.feed,
title="World of Warcraft",
summary="<p>Massive MMO news update</p>",
author="Blizzard",
link="https://example.com/wow-1",
published=datetime(2024, 1, 1, tzinfo=UTC),
content=[DummyContent("<p>The expansion launches soon.</p>")],
),
),
]
def get_feed(self, _feed_url: str) -> DummyFeed:
return self.feed
def get_entries(self, **_kwargs: object) -> list[Entry]:
return self.entries
def get_tag(self, _resource: object, _key: str, default: object = None) -> object:
return default
stub_reader = StubReader()
app.dependency_overrides[get_reader_dependency] = lambda: stub_reader
try:
with patch("discord_rss_bot.main.create_html_for_feed", return_value="<div>Rendered</div>"):
response: Response = client.get(
url="/blacklist_preview",
params={
"feed_url": stub_reader.feed.url,
"blacklist_title": "orld",
},
)
assert response.status_code == 200, f"/blacklist_preview failed: {response.text}"
assert "Skipped" in response.text
assert "World of Warcraft" in response.text
assert "Title" in response.text
assert "Author" in response.text
assert "Description" in response.text
assert "Content" in response.text
assert "filter-preview__field-row" in response.text
assert "filter-preview__match" in response.text
assert '<mark class="filter-preview__match filter-preview__match--danger">orld</mark>' in response.text
assert "Massive MMO news update" in response.text
assert "The expansion launches soon." in response.text
finally:
app.dependency_overrides = {}
def test_settings_page_shows_screenshot_layout_setting() -> None: def test_settings_page_shows_screenshot_layout_setting() -> None:
response: Response = client.get(url="/settings") response: Response = client.get(url="/settings")
assert response.status_code == 200, f"/settings failed: {response.text}" assert response.status_code == 200, f"/settings failed: {response.text}"

View file

@ -12,6 +12,8 @@ def test_is_word_in_text() -> None:
assert is_word_in_text("word1,word2", "This is a sample text containing word1.") is True, msg_true assert is_word_in_text("word1,word2", "This is a sample text containing word1.") is True, msg_true
assert is_word_in_text("word1,word2", "This is a sample text containing word2.") is True, msg_true assert is_word_in_text("word1,word2", "This is a sample text containing word2.") is True, msg_true
assert is_word_in_text("word1,word2", "This is a sample text containing WORD1 and WORD2.") is True, msg_true assert is_word_in_text("word1,word2", "This is a sample text containing WORD1 and WORD2.") is True, msg_true
assert is_word_in_text("orld", "World of Warcraft") is True, msg_true
assert is_word_in_text(" orld , craft ", "World of Warcraft") is True, msg_true
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false

View file

@ -9,6 +9,8 @@ from reader import Feed
from reader import Reader from reader import Reader
from reader import make_reader from reader import make_reader
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.whitelist import has_white_tags from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent from discord_rss_bot.filter.whitelist import should_be_sent
@ -184,3 +186,54 @@ def test_regex_should_be_sent() -> None:
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns" assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
reader.delete_tag(feed, "regex_whitelist_author") reader.delete_tag(feed, "regex_whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent" assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
def test_active_whitelist_blocks_non_matching_blacklisted_entry() -> None:
"""An active whitelist should block non-matching entries even if blacklist also matches."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
reader.set_tag(feed, "whitelist_title", "does-not-match") # pyright: ignore[reportArgumentType]
decision = evaluate_entry_filters(
first_entry[0],
blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"),
)
assert decision.should_send is False, "Entry should be skipped when whitelist is active but does not match"
assert decision.blacklist_match is not None, "Expected a blacklist match"
assert decision.whitelist_match is None, "Expected whitelist to miss"
assert "no whitelist rule matched" in decision.reason
def test_whitelist_substring_match_on_title() -> None:
"""Whitelist plain-text rules should match title substrings."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
reader.set_tag(feed, "whitelist_title", "vnnnfn") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Substring title match should whitelist the entry"