Add live preview to blacklist and whitelist
All checks were successful
Test and build Docker image / docker (push) Successful in 1m58s

This commit is contained in:
Joakim Hellsén 2026-04-27 18:27:05 +02:00
commit 6a3bba5b69
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
15 changed files with 1539 additions and 370 deletions

View file

@ -4,10 +4,7 @@ import urllib.parse
from functools import lru_cache
from typing import TYPE_CHECKING
from discord_rss_bot.filter.blacklist import entry_should_be_skipped
from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags
from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent
from discord_rss_bot.filter.evaluator import get_entry_filter_decision_from_reader
if TYPE_CHECKING:
from reader import Entry
@ -41,7 +38,7 @@ def entry_is_whitelisted(entry_to_check: Entry, reader: Reader) -> bool:
bool: True if the feed is whitelisted, False otherwise.
"""
return bool(has_white_tags(reader, entry_to_check.feed) and should_be_sent(reader, entry_to_check))
return get_entry_filter_decision_from_reader(reader, entry_to_check).whitelist_match is not None
def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool:
@ -55,6 +52,4 @@ def entry_is_blacklisted(entry_to_check: Entry, reader: Reader) -> bool:
bool: True if the feed is blacklisted, False otherwise.
"""
return bool(
feed_has_blacklist_tags(reader, entry_to_check.feed) and entry_should_be_skipped(reader, entry_to_check),
)
return get_entry_filter_decision_from_reader(reader, entry_to_check).blacklist_match is not None

View file

@ -37,9 +37,7 @@ from discord_rss_bot.custom_message import CustomEmbed
from discord_rss_bot.custom_message import get_custom_message
from discord_rss_bot.custom_message import replace_tags_in_embed
from discord_rss_bot.custom_message import replace_tags_in_text_message
from discord_rss_bot.filter.blacklist import entry_should_be_skipped
from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent
from discord_rss_bot.filter.evaluator import get_entry_filter_decision_from_reader
from discord_rss_bot.hoyolab_api import create_hoyolab_webhook
from discord_rss_bot.hoyolab_api import extract_post_id_from_hoyolab_url
from discord_rss_bot.hoyolab_api import fetch_hoyolab_post
@ -711,14 +709,9 @@ def send_to_discord(reader: Reader | None = None, feed: Feed | None = None, *, d
use_default_message_on_empty=True,
)
# Check if the entry is blacklisted, and if it is, we will skip it.
if entry_should_be_skipped(effective_reader, entry):
logger.info("Entry was blacklisted: %s", entry.id)
continue
# Check if the feed has a whitelist, and if it does, check if the entry is whitelisted.
if has_white_tags(effective_reader, entry.feed) and not should_be_sent(effective_reader, entry):
logger.info("Entry was not whitelisted: %s", entry.id)
decision = get_entry_filter_decision_from_reader(effective_reader, entry)
if not decision.should_send:
logger.info("Entry was skipped: %s (%s)", entry.id, decision.reason)
continue
# Use a custom webhook for Hoyolab feeds.

View file

@ -2,8 +2,9 @@ from __future__ import annotations
from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import is_regex_match
from discord_rss_bot.filter.utils import is_word_in_text
from discord_rss_bot.filter.evaluator import find_filter_match
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.evaluator import has_filter_values
if TYPE_CHECKING:
from reader import Entry
@ -31,29 +32,10 @@ def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
Returns:
bool: If the feed has any of the tags.
"""
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
return bool(
blacklist_title
or blacklist_author
or blacklist_content
or blacklist_summary
or regex_blacklist_author
or regex_blacklist_content
or regex_blacklist_summary
or regex_blacklist_title,
)
return has_filter_values(get_filter_values_from_reader(reader, feed, "blacklist"))
def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool:
"""Return True if the entry is in the blacklist.
Args:
@ -63,58 +45,4 @@ def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0
Returns:
bool: If the entry is in the blacklist.
"""
feed = entry.feed
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
# TODO(TheLovinator): Also add support for entry_text and more.
# Check regular blacklist
if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
return True
if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
return True
if (
entry.content
and entry.content[0].value
and blacklist_content
and is_word_in_text(blacklist_content, entry.content[0].value)
):
return True
if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
return True
if (
entry.content
and entry.content[0].value
and blacklist_content
and is_word_in_text(blacklist_content, entry.content[0].value)
):
return True
# Check regex blacklist
if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title):
return True
if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary):
return True
if (
entry.content
and entry.content[0].value
and regex_blacklist_content
and is_regex_match(regex_blacklist_content, entry.content[0].value)
):
return True
if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author):
return True
return bool(
entry.content
and entry.content[0].value
and regex_blacklist_content
and is_regex_match(regex_blacklist_content, entry.content[0].value),
)
return bool(find_filter_match(entry, get_filter_values_from_reader(reader, entry.feed, "blacklist"), "blacklist"))

View file

@ -0,0 +1,271 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import is_regex_match
from discord_rss_bot.filter.utils import is_word_in_text
if TYPE_CHECKING:
from collections.abc import Mapping
from reader import Entry
from reader import Feed
from reader import Reader
FILTER_FIELDS: tuple[str, str, str, str] = ("title", "summary", "content", "author")
FilterValues = dict[str, str]
@dataclass(frozen=True, slots=True)
class FilterMatch:
filter_name: str
field_name: str
match_type: str
pattern: str
@property
def description(self) -> str:
field_label: str = self.field_name.replace("_", " ")
return f"{self.filter_name} {self.match_type} match on {field_label}"
@dataclass(frozen=True, slots=True)
class EntryFilterDecision:
should_send: bool
reason: str
blacklist_match: FilterMatch | None
whitelist_match: FilterMatch | None
has_blacklist_filters: bool
has_whitelist_filters: bool
def get_filter_values_from_reader(reader: Reader, feed: Feed, filter_name: str) -> FilterValues:
"""Return stripped filter tag values for a feed.
Args:
reader: The reader instance.
feed: The feed whose filter tags should be loaded.
filter_name: Either blacklist or whitelist.
Returns:
FilterValues: The current saved filter values.
"""
values: FilterValues = {}
for field_name in FILTER_FIELDS:
values[field_name] = str(reader.get_tag(feed, f"{filter_name}_{field_name}", "")).strip()
values[f"regex_{field_name}"] = str(reader.get_tag(feed, f"regex_{filter_name}_{field_name}", "")).strip()
return values
def coerce_filter_values(filter_name: str, values: Mapping[str, str] | None = None) -> FilterValues:
"""Normalize incoming filter values from forms or tests.
Args:
filter_name: Either blacklist or whitelist.
values: Optional raw mapping of form or saved values.
Returns:
FilterValues: A normalized value mapping.
"""
source_values: Mapping[str, str] = values or {}
normalized_values: FilterValues = {}
for field_name in FILTER_FIELDS:
normalized_values[field_name] = str(
source_values.get(f"{filter_name}_{field_name}", source_values.get(field_name, "")),
).strip()
normalized_values[f"regex_{field_name}"] = str(
source_values.get(
f"regex_{filter_name}_{field_name}",
source_values.get(f"regex_{field_name}", ""),
),
).strip()
return normalized_values
def has_filter_values(values: Mapping[str, str]) -> bool:
"""Return whether any filter value is configured.
Args:
values: Filter values to inspect.
Returns:
bool: True when at least one value is non-empty.
"""
return any(str(value).strip() for value in values.values())
def get_entry_filter_decision_from_reader(reader: Reader, entry: Entry) -> EntryFilterDecision:
"""Evaluate an entry against its saved blacklist and whitelist tags.
Args:
reader: The reader instance.
entry: The entry to evaluate.
Returns:
EntryFilterDecision: Final decision plus match details.
"""
return evaluate_entry_filters(
entry,
blacklist_values=get_filter_values_from_reader(reader, entry.feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, entry.feed, "whitelist"),
)
def evaluate_entry_filters(
entry: Entry,
*,
blacklist_values: Mapping[str, str] | None = None,
whitelist_values: Mapping[str, str] | None = None,
) -> EntryFilterDecision:
"""Evaluate one entry against blacklist and whitelist settings.
Whitelist matches take precedence over blacklist matches.
Args:
entry: The entry to evaluate.
blacklist_values: Blacklist values from saved tags or a form.
whitelist_values: Whitelist values from saved tags or a form.
Returns:
EntryFilterDecision: Final decision plus match details.
"""
normalized_blacklist_values: FilterValues = coerce_filter_values("blacklist", blacklist_values)
normalized_whitelist_values: FilterValues = coerce_filter_values("whitelist", whitelist_values)
blacklist_match: FilterMatch | None = find_filter_match(entry, normalized_blacklist_values, "blacklist")
whitelist_match: FilterMatch | None = find_filter_match(entry, normalized_whitelist_values, "whitelist")
has_blacklist_filters: bool = has_filter_values(normalized_blacklist_values)
has_whitelist_filters: bool = has_filter_values(normalized_whitelist_values)
if whitelist_match and blacklist_match:
return EntryFilterDecision(
should_send=True,
reason=f"Sent because {whitelist_match.description}; whitelist overrides blacklist.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if whitelist_match:
return EntryFilterDecision(
should_send=True,
reason=f"Sent because {whitelist_match.description}.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if has_whitelist_filters and blacklist_match:
return EntryFilterDecision(
should_send=False,
reason=f"Skipped because {blacklist_match.description} and no whitelist rule matched.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if has_whitelist_filters:
return EntryFilterDecision(
should_send=False,
reason="Skipped because no whitelist rule matched.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
if blacklist_match:
return EntryFilterDecision(
should_send=False,
reason=f"Skipped because {blacklist_match.description}.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
return EntryFilterDecision(
should_send=True,
reason="Sent because no active filter blocked it.",
blacklist_match=blacklist_match,
whitelist_match=whitelist_match,
has_blacklist_filters=has_blacklist_filters,
has_whitelist_filters=has_whitelist_filters,
)
def find_filter_match(entry: Entry, values: Mapping[str, str], filter_name: str) -> FilterMatch | None:
"""Return the first matching filter rule for an entry.
Args:
entry: The entry to evaluate.
values: Normalized filter values.
filter_name: Either blacklist or whitelist.
Returns:
FilterMatch | None: The first matching rule, if any.
"""
entry_fields: dict[str, str] = get_entry_fields(entry)
for field_name in FILTER_FIELDS:
pattern: str = str(values.get(field_name, "")).strip()
field_text: str = entry_fields[field_name]
if pattern and field_text and is_word_in_text(pattern, field_text):
return FilterMatch(
filter_name=filter_name,
field_name=field_name,
match_type="text",
pattern=pattern,
)
for field_name in FILTER_FIELDS:
pattern = str(values.get(f"regex_{field_name}", "")).strip()
field_text = entry_fields[field_name]
if pattern and field_text and is_regex_match(pattern, field_text):
return FilterMatch(
filter_name=filter_name,
field_name=field_name,
match_type="regex",
pattern=pattern,
)
return None
def get_entry_fields(entry: Entry) -> dict[str, str]:
"""Return the entry fields used during filter matching.
Args:
entry: The entry to inspect.
Returns:
dict[str, str]: The fields used by filter evaluation.
"""
content_value: str = ""
if entry.content and entry.content[0].value:
content_value = entry.content[0].value
return {
"title": entry.title or "",
"summary": entry.summary or "",
"content": content_value,
"author": entry.author or "",
}
def get_entry_decision_key(entry: Entry) -> str:
"""Return a stable key for mapping preview decisions to entries.
Args:
entry: The entry to key.
Returns:
str: A stable key based on feed URL and entry id.
"""
return f"{entry.feed.url}|{entry.id}"

View file

@ -7,22 +7,22 @@ logger: logging.Logger = logging.getLogger(__name__)
def is_word_in_text(word_string: str, text: str) -> bool:
"""Check if any of the words are in the text.
"""Check if any comma-separated terms are in the text.
Args:
word_string: A comma-separated string of words to search for.
word_string: A comma-separated string of terms to search for.
text: The text to search in.
Returns:
bool: True if any word is found in the text, otherwise False.
bool: True if any term is found in the text, otherwise False.
"""
word_list: list[str] = word_string.split(",")
if not word_string or not text:
return False
# Compile regex patterns for each word.
patterns: list[re.Pattern[str]] = [re.compile(rf"(^|[^\w]){word}([^\w]|$)", re.IGNORECASE) for word in word_list]
normalized_text: str = text.casefold()
terms: list[str] = [term.strip().casefold() for term in word_string.split(",") if term.strip()]
# Check if any pattern matches the text.
return any(pattern.search(text) for pattern in patterns)
return any(term in normalized_text for term in terms)
def is_regex_match(regex_string: str, text: str) -> bool:

View file

@ -2,8 +2,9 @@ from __future__ import annotations
from typing import TYPE_CHECKING
from discord_rss_bot.filter.utils import is_regex_match
from discord_rss_bot.filter.utils import is_word_in_text
from discord_rss_bot.filter.evaluator import find_filter_match
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.evaluator import has_filter_values
if TYPE_CHECKING:
from reader import Entry
@ -31,29 +32,10 @@ def has_white_tags(reader: Reader, feed: Feed) -> bool:
Returns:
bool: If the feed has any of the tags.
"""
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
return bool(
whitelist_title
or whitelist_author
or whitelist_content
or whitelist_summary
or regex_whitelist_author
or regex_whitelist_content
or regex_whitelist_summary
or regex_whitelist_title,
)
return has_filter_values(get_filter_values_from_reader(reader, feed, "whitelist"))
def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
def should_be_sent(reader: Reader, entry: Entry) -> bool:
"""Return True if the entry is in the whitelist.
Args:
@ -63,44 +45,4 @@ def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
Returns:
bool: If the entry is in the whitelist.
"""
feed: Feed = entry.feed
# Regular whitelist tags
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
# Regex whitelist tags
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
# Check regular whitelist
if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
return True
if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
return True
if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
return True
if (
entry.content
and entry.content[0].value
and whitelist_content
and is_word_in_text(whitelist_content, entry.content[0].value)
):
return True
# Check regex whitelist
if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title):
return True
if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary):
return True
if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author):
return True
return bool(
entry.content
and entry.content[0].value
and regex_whitelist_content
and is_regex_match(regex_whitelist_content, entry.content[0].value),
)
return bool(find_filter_match(entry, get_filter_values_from_reader(reader, entry.feed, "whitelist"), "whitelist"))

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import json
import logging
import logging.config
import re
import typing
import urllib.parse
from contextlib import asynccontextmanager
@ -10,6 +11,8 @@ from dataclasses import dataclass
from datetime import UTC
from datetime import datetime
from functools import lru_cache
from html import escape
from html import unescape
from typing import TYPE_CHECKING
from typing import Annotated
from typing import Any
@ -54,6 +57,14 @@ from discord_rss_bot.feeds import get_feed_delivery_mode
from discord_rss_bot.feeds import get_screenshot_layout
from discord_rss_bot.feeds import send_entry_to_discord
from discord_rss_bot.feeds import send_to_discord
from discord_rss_bot.filter.evaluator import FILTER_FIELDS
from discord_rss_bot.filter.evaluator import EntryFilterDecision
from discord_rss_bot.filter.evaluator import FilterMatch
from discord_rss_bot.filter.evaluator import coerce_filter_values
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_entry_decision_key
from discord_rss_bot.filter.evaluator import get_entry_fields
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.git_backup import commit_state_change
from discord_rss_bot.git_backup import get_backup_path
from discord_rss_bot.is_url_valid import is_url_valid
@ -125,6 +136,15 @@ def has_webhooks() -> bool:
SECONDS_PER_MINUTE = 60
SECONDS_PER_HOUR = 3600
SECONDS_PER_DAY = 86400
FILTER_PREVIEW_LIMIT = 50
PREVIEW_FIELD_LABELS: dict[str, str] = {
"title": "Title",
"author": "Author",
"summary": "Description",
"content": "Content",
}
PREVIEW_HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
PREVIEW_WHITESPACE_PATTERN = re.compile(r"\s+")
def relative_time(dt: datetime | None) -> str:
@ -459,19 +479,50 @@ async def get_whitelist(
"""
clean_feed_url: str = feed_url.strip()
feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url))
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", ""))
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", ""))
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", ""))
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", ""))
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", ""))
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
context = {
"request": request,
"feed": feed,
**build_filter_form_context("whitelist", get_filter_values_from_reader(reader, feed, "whitelist")),
**build_filter_preview_context(reader, feed, "whitelist"),
}
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
@app.get("/whitelist_preview", response_class=HTMLResponse)
async def get_whitelist_preview(
feed_url: str,
request: Request,
reader: Annotated[Reader, Depends(get_reader_dependency)],
whitelist_title: str = "",
whitelist_summary: str = "",
whitelist_content: str = "",
whitelist_author: str = "",
regex_whitelist_title: str = "",
regex_whitelist_summary: str = "",
regex_whitelist_content: str = "",
regex_whitelist_author: str = "",
) -> HTMLResponse:
"""Render the whitelist preview fragment for HTMX updates.
Args:
feed_url: Feed URL whose entries should be previewed.
request: The request object.
reader: The Reader instance.
whitelist_title: Word-based title whitelist.
whitelist_summary: Word-based summary whitelist.
whitelist_content: Word-based content whitelist.
whitelist_author: Word-based author whitelist.
regex_whitelist_title: Regex title whitelist.
regex_whitelist_summary: Regex summary whitelist.
regex_whitelist_content: Regex content whitelist.
regex_whitelist_author: Regex author whitelist.
Returns:
HTMLResponse: Rendered filter preview fragment.
"""
clean_feed_url: str = urllib.parse.unquote(feed_url.strip())
feed: Feed = reader.get_feed(clean_feed_url)
form_values: dict[str, str] = {
"whitelist_title": whitelist_title,
"whitelist_summary": whitelist_summary,
"whitelist_content": whitelist_content,
@ -481,7 +532,16 @@ async def get_whitelist(
"regex_whitelist_content": regex_whitelist_content,
"regex_whitelist_author": regex_whitelist_author,
}
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
return templates.TemplateResponse(
request=request,
name="_filter_preview.html",
context={
"request": request,
"feed": feed,
**build_filter_preview_context(reader, feed, "whitelist", form_values=form_values),
},
)
@app.post("/blacklist")
@ -548,18 +608,50 @@ async def get_blacklist(
"""
feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url))
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", ""))
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", ""))
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", ""))
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", ""))
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", ""))
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
context = {
"request": request,
"feed": feed,
**build_filter_form_context("blacklist", get_filter_values_from_reader(reader, feed, "blacklist")),
**build_filter_preview_context(reader, feed, "blacklist"),
}
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
@app.get("/blacklist_preview", response_class=HTMLResponse)
async def get_blacklist_preview(
feed_url: str,
request: Request,
reader: Annotated[Reader, Depends(get_reader_dependency)],
blacklist_title: str = "",
blacklist_summary: str = "",
blacklist_content: str = "",
blacklist_author: str = "",
regex_blacklist_title: str = "",
regex_blacklist_summary: str = "",
regex_blacklist_content: str = "",
regex_blacklist_author: str = "",
) -> HTMLResponse:
"""Render the blacklist preview fragment for HTMX updates.
Args:
feed_url: Feed URL whose entries should be previewed.
request: The request object.
reader: The Reader instance.
blacklist_title: Word-based title blacklist.
blacklist_summary: Word-based summary blacklist.
blacklist_content: Word-based content blacklist.
blacklist_author: Word-based author blacklist.
regex_blacklist_title: Regex title blacklist.
regex_blacklist_summary: Regex summary blacklist.
regex_blacklist_content: Regex content blacklist.
regex_blacklist_author: Regex author blacklist.
Returns:
HTMLResponse: Rendered filter preview fragment.
"""
clean_feed_url: str = urllib.parse.unquote(feed_url.strip())
feed: Feed = reader.get_feed(clean_feed_url)
form_values: dict[str, str] = {
"blacklist_title": blacklist_title,
"blacklist_summary": blacklist_summary,
"blacklist_content": blacklist_content,
@ -569,7 +661,347 @@ async def get_blacklist(
"regex_blacklist_content": regex_blacklist_content,
"regex_blacklist_author": regex_blacklist_author,
}
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
return templates.TemplateResponse(
request=request,
name="_filter_preview.html",
context={
"request": request,
"feed": feed,
**build_filter_preview_context(reader, feed, "blacklist", form_values=form_values),
},
)
def build_filter_form_context(filter_name: str, values: dict[str, str]) -> dict[str, str]:
"""Return template context keys for a filter form.
Args:
filter_name: Either blacklist or whitelist.
values: Normalized filter values.
Returns:
dict[str, str]: Template keys matching current form field names.
"""
context: dict[str, str] = {}
for field_name in FILTER_FIELDS:
context[f"{filter_name}_{field_name}"] = values[field_name]
context[f"regex_{filter_name}_{field_name}"] = values[f"regex_{field_name}"]
return context
def build_filter_preview_context(
reader: Reader,
feed: Feed,
filter_name: str,
form_values: dict[str, str] | None = None,
) -> dict[str, Any]:
"""Build preview data for the blacklist and whitelist pages.
Args:
reader: The Reader instance.
feed: The feed being previewed.
filter_name: Either blacklist or whitelist.
form_values: Optional unsaved values from the current form.
Returns:
dict[str, Any]: Preview context for template rendering.
"""
saved_blacklist_values: dict[str, str] = get_filter_values_from_reader(reader, feed, "blacklist")
saved_whitelist_values: dict[str, str] = get_filter_values_from_reader(reader, feed, "whitelist")
preview_blacklist_values: dict[str, str] = saved_blacklist_values
preview_whitelist_values: dict[str, str] = saved_whitelist_values
helper_text: str = "Saved whitelist rules still apply while previewing blacklist changes."
if filter_name == "blacklist":
preview_blacklist_values = coerce_filter_values("blacklist", form_values)
else:
preview_whitelist_values = coerce_filter_values("whitelist", form_values)
helper_text = "Saved blacklist rules still apply while previewing whitelist changes."
preview_entries: list[Entry] = list(reader.get_entries(feed=feed, limit=FILTER_PREVIEW_LIMIT))
preview_rows: list[dict[str, Any]] = []
preview_decisions: dict[str, EntryFilterDecision] = {}
sent_count = 0
skipped_count = 0
blacklist_match_count = 0
whitelist_match_count = 0
for entry in preview_entries:
decision: EntryFilterDecision = evaluate_entry_filters(
entry,
blacklist_values=preview_blacklist_values,
whitelist_values=preview_whitelist_values,
)
preview_decisions[get_entry_decision_key(entry)] = decision
if decision.should_send:
sent_count += 1
else:
skipped_count += 1
if decision.blacklist_match:
blacklist_match_count += 1
if decision.whitelist_match:
whitelist_match_count += 1
published_label: str = "Unknown date"
if entry.published:
published_label = entry.published.strftime("%Y-%m-%d %H:%M:%S")
preview_rows.append(
{
"entry": entry,
"decision": decision,
"field_rows": build_preview_field_rows(entry, decision),
"published_label": published_label,
"status_label": "Sent" if decision.should_send else "Skipped",
"status_class": "success" if decision.should_send else "danger",
},
)
preview_html: str = create_html_for_feed(
reader=reader,
entries=preview_entries,
current_feed_url=feed.url,
entry_decisions=preview_decisions,
)
return {
"filter_name": filter_name,
"filter_label": filter_name.title(),
"preview_entries": preview_entries,
"preview_rows": preview_rows,
"preview_html": preview_html,
"preview_limit": FILTER_PREVIEW_LIMIT,
"preview_summary": {
"total": len(preview_entries),
"sent": sent_count,
"skipped": skipped_count,
"blacklist_matches": blacklist_match_count,
"whitelist_matches": whitelist_match_count,
},
"preview_helper_text": helper_text,
}
def build_preview_field_rows(entry: Entry, decision: EntryFilterDecision) -> list[dict[str, Any]]:
"""Build labeled preview fields for the filter UI.
Args:
entry: Entry whose values should be shown.
decision: The final decision for the entry.
Returns:
list[dict[str, Any]]: Labeled field rows for the preview template.
"""
entry_fields: dict[str, str] = get_entry_fields(entry)
field_rows: list[dict[str, Any]] = []
for field_name in ("title", "author", "summary", "content"):
badges: list[dict[str, str]] = []
matches: list[tuple[FilterMatch, str]] = []
if decision.blacklist_match and decision.blacklist_match.field_name == field_name:
badges.append({"label": "Blacklist match", "class": "danger"})
matches.append((decision.blacklist_match, "danger"))
if decision.whitelist_match and decision.whitelist_match.field_name == field_name:
badges.append({"label": "Whitelist match", "class": "success"})
matches.append((decision.whitelist_match, "success"))
field_rows.append(
{
"label": PREVIEW_FIELD_LABELS[field_name],
"value_html": format_preview_field_value(entry_fields[field_name], matches),
"badges": badges,
},
)
return field_rows
def format_preview_field_value(
value: str,
matches: list[tuple[FilterMatch, str]],
max_length: int = 280,
) -> str:
"""Convert entry field content into readable preview text with highlight markup.
Args:
value: Raw field value from the entry.
matches: Matching filters for this field and their display classes.
max_length: Max number of characters to display.
Returns:
str: Normalized preview HTML.
"""
normalized_value: str = normalize_preview_field_value(value)
if not normalized_value:
return "No value"
highlighted_span, highlight_class = get_preview_highlight_span(normalized_value, matches)
clipped_value, clipped_span = clip_preview_value(normalized_value, highlighted_span, max_length)
if clipped_span is None or highlight_class is None:
return escape(clipped_value)
start, end = clipped_span
return "".join(
[
escape(clipped_value[:start]),
f'<mark class="filter-preview__match filter-preview__match--{highlight_class}">',
escape(clipped_value[start:end]),
"</mark>",
escape(clipped_value[end:]),
],
)
def normalize_preview_field_value(value: str) -> str:
"""Convert entry field content into readable plain text.
Args:
value: Raw field value.
Returns:
str: Plain-text preview value.
"""
if not value:
return ""
plain_text: str = PREVIEW_HTML_TAG_PATTERN.sub(" ", value)
return PREVIEW_WHITESPACE_PATTERN.sub(" ", unescape(plain_text)).strip()
def get_preview_highlight_span(
value: str,
matches: list[tuple[FilterMatch, str]],
) -> tuple[tuple[int, int] | None, str | None]:
"""Return the earliest highlight span for the preview field.
Args:
value: Normalized field value.
matches: Matching filters and associated preview classes.
Returns:
tuple[tuple[int, int] | None, str | None]: Span and highlight class.
"""
first_span: tuple[int, int] | None = None
first_class: str | None = None
for match, highlight_class in matches:
span = get_filter_match_span(value, match)
if span is None:
continue
if first_span is None or span[0] < first_span[0]:
first_span = span
first_class = highlight_class
return first_span, first_class
def get_filter_match_span(value: str, match: FilterMatch) -> tuple[int, int] | None:
"""Return the matched substring span for a preview field.
Args:
value: Normalized preview value.
match: Matching filter metadata.
Returns:
tuple[int, int] | None: The first matching span if found.
"""
if match.match_type == "regex":
return get_regex_match_span(value, match.pattern)
return get_text_match_span(value, match.pattern)
def get_text_match_span(value: str, pattern: str) -> tuple[int, int] | None:
"""Return the earliest case-insensitive substring span for comma-separated text terms."""
earliest_span: tuple[int, int] | None = None
for term in [part.strip() for part in pattern.split(",") if part.strip()]:
compiled_pattern = re.compile(re.escape(term), re.IGNORECASE)
match = compiled_pattern.search(value)
if match and (earliest_span is None or match.start() < earliest_span[0]):
earliest_span = match.span()
return earliest_span
def get_regex_match_span(value: str, pattern: str) -> tuple[int, int] | None:
"""Return the earliest regex match span for newline/comma-separated patterns."""
earliest_span: tuple[int, int] | None = None
for pattern_str in split_regex_patterns(pattern):
try:
compiled_pattern = re.compile(pattern_str, re.IGNORECASE)
except re.error:
continue
match = compiled_pattern.search(value)
if match and match.start() != match.end():
current_span = match.span()
if earliest_span is None or current_span[0] < earliest_span[0]:
earliest_span = current_span
return earliest_span
def split_regex_patterns(pattern: str) -> list[str]:
"""Split regex filter text using the same newline/comma semantics as the matcher.
Args:
pattern: The raw regex pattern string.
Returns:
list[str]: A list of individual regex patterns.
"""
regex_patterns: list[str] = []
for line in pattern.split("\n"):
stripped_line = line.strip()
if not stripped_line:
continue
if "," in stripped_line:
regex_patterns.extend([part.strip() for part in stripped_line.split(",") if part.strip()])
else:
regex_patterns.append(stripped_line)
return regex_patterns
def clip_preview_value(
value: str,
highlight_span: tuple[int, int] | None,
max_length: int,
) -> tuple[str, tuple[int, int] | None]:
"""Clip a preview value while keeping the highlighted match visible when possible.
Args:
value: The normalized preview value.
highlight_span: The span of the highlighted match within the value.
max_length: The maximum length of the clipped value.
Returns:
tuple[str, tuple[int, int] | None]: The clipped preview value and adjusted highlight
"""
if len(value) <= max_length:
return value, highlight_span
if highlight_span is None:
return f"{value[: max_length - 1].rstrip()}", None
match_start, match_end = highlight_span
window_start = max(0, match_start - (max_length // 3))
window_end = min(len(value), window_start + max_length)
if match_end > window_end:
window_end = min(len(value), match_end + (max_length // 3))
window_start = max(0, window_end - max_length)
clipped_value = value[window_start:window_end]
clipped_span = (match_start - window_start, match_end - window_start)
if window_start > 0:
clipped_value = f"{clipped_value}"
clipped_span = (clipped_span[0] + 1, clipped_span[1] + 1)
if window_end < len(value):
clipped_value = f"{clipped_value}"
return clipped_value, clipped_span
@app.post("/custom")
@ -1239,6 +1671,7 @@ def create_html_for_feed( # noqa: C901, PLR0914
reader: Reader,
entries: Iterable[Entry],
current_feed_url: str = "",
entry_decisions: dict[str, EntryFilterDecision] | None = None,
) -> str:
"""Create HTML for the search results.
@ -1246,6 +1679,7 @@ def create_html_for_feed( # noqa: C901, PLR0914
reader: The Reader instance to use.
entries: The entries to create HTML for.
current_feed_url: The feed URL currently being viewed in /feed.
entry_decisions: Optional preview decisions keyed by feed URL and entry id.
Returns:
str: The HTML for the search results.
@ -1268,12 +1702,22 @@ def create_html_for_feed( # noqa: C901, PLR0914
if entry.published:
published: str = entry.published.strftime("%Y-%m-%d %H:%M:%S")
decision: EntryFilterDecision | None = None
if entry_decisions is not None:
decision = entry_decisions.get(get_entry_decision_key(entry))
is_blacklisted: bool = entry_is_blacklisted(entry, reader=reader)
is_whitelisted: bool = entry_is_whitelisted(entry, reader=reader)
if decision is not None:
is_blacklisted = decision.blacklist_match is not None
is_whitelisted = decision.whitelist_match is not None
blacklisted: str = ""
if entry_is_blacklisted(entry, reader=reader):
if is_blacklisted:
blacklisted = "<span class='badge bg-danger'>Blacklisted</span>"
whitelisted: str = ""
if entry_is_whitelisted(entry, reader=reader):
if is_whitelisted:
whitelisted = "<span class='badge bg-success'>Whitelisted</span>"
source_feed_url: str = getattr(entry, "original_feed_url", None) or entry.feed.url

View file

@ -64,3 +64,127 @@ body {
overflow-wrap: anywhere;
word-break: break-word;
}
.filter-page__sidebar {
height: 100%;
}
.filter-page__example {
white-space: pre-wrap;
overflow-wrap: anywhere;
color: #d8d8d8;
font-size: 0.9rem;
}
.filter-preview__list {
max-height: 48vh;
overflow-y: auto;
padding-right: 0.25rem;
}
.filter-preview__content {
min-width: 0;
}
.filter-preview__link {
overflow-wrap: anywhere;
}
.filter-preview__status {
min-width: 5.5rem;
}
.filter-preview__pattern {
display: inline-flex;
align-items: center;
padding: 0.15rem 0.5rem;
border: 1px solid #2f2f2f;
background: #1b1b1b;
color: #d7d7d7;
overflow-wrap: anywhere;
}
.filter-preview__rendered {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.filter-preview__rendered>div {
margin-bottom: 0;
}
.filter-preview__field-table {
display: flex;
flex-direction: column;
gap: 0;
border: 1px solid #2f2f2f;
background: #1b1b1b;
}
.filter-preview__field-row {
display: grid;
grid-template-columns: minmax(5.5rem, 6.75rem) minmax(0, 1fr) auto;
gap: 0.75rem;
align-items: start;
padding: 0.55rem 0.75rem;
border-bottom: 1px solid #2a2a2a;
}
.filter-preview__field-row:last-child {
border-bottom: 0;
}
.filter-preview__field-name {
color: #d8d8d8;
font-size: 0.78rem;
font-weight: 600;
letter-spacing: 0.04em;
text-transform: uppercase;
}
.filter-preview__field-value {
color: #bfbfbf;
overflow-wrap: anywhere;
word-break: break-word;
}
.filter-preview__field-badges {
display: flex;
flex-wrap: wrap;
justify-content: flex-end;
gap: 0.25rem;
}
.filter-preview__match {
padding: 0 0.15rem;
border-radius: 0.15rem;
}
.filter-preview__match--danger {
background: #652020;
color: #ffe2e2;
}
.filter-preview__match--success {
background: #1e5330;
color: #def8e5;
}
@media (max-width: 767.98px) {
.filter-preview__field-row {
grid-template-columns: 1fr;
gap: 0.35rem;
}
.filter-preview__field-badges {
justify-content: flex-start;
}
}
@media (min-width: 992px) {
.filter-page__sidebar {
position: sticky;
top: 1rem;
}
}

View file

@ -0,0 +1,86 @@
<div class="d-flex flex-column gap-4 filter-preview">
<div class="d-flex flex-column flex-md-row justify-content-between align-items-start gap-3">
<div>
<h3 class="h5 mb-1">Live preview</h3>
<p class="text-muted mb-0">Latest {{ preview_limit }} entries from {{ feed.title or feed.url }}</p>
</div>
<div class="d-flex flex-wrap gap-2">
<span class="badge bg-secondary">{{ preview_summary.total }} checked</span>
<span class="badge bg-success">{{ preview_summary.sent }} sent</span>
<span class="badge bg-danger">{{ preview_summary.skipped }} skipped</span>
<span class="badge bg-warning text-dark">{{ preview_summary.blacklist_matches }} blacklist match{{ 'es' if preview_summary.blacklist_matches != 1 else '' }}</span>
<span class="badge bg-info text-dark">{{ preview_summary.whitelist_matches }} whitelist match{{ 'es' if preview_summary.whitelist_matches != 1 else '' }}</span>
</div>
</div>
<p class="text-muted small mb-0">{{ preview_helper_text }}</p>
<section>
<div class="d-flex flex-wrap justify-content-between align-items-center gap-2 mb-3">
<h4 class="h6 text-uppercase text-muted mb-0">Decision list</h4>
<span class="text-muted small">Updates as you type. Saving is still manual.</span>
</div>
{% if preview_rows %}
<div class="d-flex flex-column gap-2 filter-preview__list">
{% for row in preview_rows %}
<article class="p-3 border border-dark rounded-0 filter-preview__item">
<div class="d-flex flex-column flex-md-row justify-content-between align-items-start gap-3 mb-2">
<div class="filter-preview__content">
<h5 class="h6 mb-1">
{% if row.entry.link %}
<a class="text-muted text-decoration-none filter-preview__link"
href="{{ row.entry.link }}">{{ row.entry.title or row.entry.id }}</a>
{% else %}
<span class="text-light">{{ row.entry.title or row.entry.id }}</span>
{% endif %}
</h5>
<p class="text-muted small mb-0">
{% if row.entry.author %}By {{ row.entry.author }} |{% endif %}
{{ row.published_label }}
</p>
</div>
<span class="badge bg-{{ row.status_class }} filter-preview__status">{{ row.status_label }}</span>
</div>
<p class="mb-2">{{ row.decision.reason }}</p>
<div class="d-flex flex-wrap gap-2 align-items-center small">
{% if row.decision.blacklist_match %}
<span class="badge bg-danger">{{ row.decision.blacklist_match.description }}</span>
<span class="filter-preview__pattern">{{ row.decision.blacklist_match.pattern }}</span>
{% endif %}
{% if row.decision.whitelist_match %}
<span class="badge bg-success">{{ row.decision.whitelist_match.description }}</span>
<span class="filter-preview__pattern">{{ row.decision.whitelist_match.pattern }}</span>
{% endif %}
</div>
<div class="filter-preview__field-table mt-2">
{% for field in row.field_rows %}
<section class="filter-preview__field-row">
<div class="filter-preview__field-name">{{ field.label }}</div>
<div class="filter-preview__field-value">{{ field.value_html|safe }}</div>
<div class="filter-preview__field-badges">
{% for badge in field.badges %}<span class="badge bg-{{ badge.class }}">{{ badge.label }}</span>{% endfor %}
</div>
</section>
{% endfor %}
</div>
</article>
{% endfor %}
</div>
{% else %}
<div class="p-3 border border-dark rounded-0">
<p class="text-muted mb-0">No entries are available yet for this feed, so there is nothing to preview.</p>
</div>
{% endif %}
</section>
<section>
<div class="d-flex flex-wrap justify-content-between align-items-center gap-2 mb-3">
<h4 class="h6 text-uppercase text-muted mb-0">Rendered entries</h4>
<span class="text-muted small">Uses the same entry rendering as the feed page.</span>
</div>
{% if preview_html %}
<div class="filter-preview__rendered">{{ preview_html|safe }}</div>
{% else %}
<div class="p-3 border border-dark rounded-0">
<p class="text-muted mb-0">Rendered preview will appear here when entries are available.</p>
</div>
{% endif %}
</section>
</div>

View file

@ -1,98 +1,126 @@
{% extends "base.html" %}
{% block title %}
| Blacklist
| Blacklist
{% endblock title %}
{% block content %}
<div class="p-2 border border-dark">
<form action="/blacklist" method="post">
<!-- Feed URL -->
<div class="row pb-2">
<div class="col-sm-12">
<div class="form-text">
<ul class="list-inline">
<li>
Comma separated list of words to blacklist. If a word is found in the
corresponding blacklists, the feed will not be sent.
</li>
<li>Whitelist always takes precedence over blacklist. Leave empty to disable.</li>
<li>Words are case-insensitive. No spaces should be used before or after the comma.</li>
<li>
Correct:
<code>
primogem,events,gameplay preview,special program
</code>
</li>
<li>
Wrong:
<code>
primogem, events, gameplay preview, special program
</code>
</li>
</ul>
</div>
<label for="blacklist_title" class="col-sm-6 col-form-label">Blacklist - Title</label>
<input name="blacklist_title" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_title" value="{%- if blacklist_title -%}{{ blacklist_title }}{%- endif -%}" />
<label for="blacklist_summary" class="col-sm-6 col-form-label">Blacklist - Summary</label>
<input name="blacklist_summary" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_summary" value="{%- if blacklist_summary -%}{{ blacklist_summary }}{%- endif -%}" />
<label for="blacklist_content" class="col-sm-6 col-form-label">Blacklist - Content</label>
<input name="blacklist_content" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_content" value="{%- if blacklist_content -%}{{ blacklist_content }}{%- endif -%}" />
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
<input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
<div class="mt-4">
<div class="form-text">
<ul class="list-inline">
<li>
Regular expression patterns for advanced filtering. Each pattern should be on a new
line.
</li>
<li>Patterns are case-insensitive.</li>
<li>
Examples:
<code>
<pre>
^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*
</pre>
</code>
</li>
</ul>
<div class="row g-3 filter-page">
<div class="col-lg-5">
<section class="card border border-dark shadow-sm text-light rounded-0 filter-page__sidebar">
<div class="card-body p-3 p-md-4">
<div class="mb-4">
<h2 class="h4 mb-2">Blacklist Rules</h2>
<p class="text-muted mb-3">
Build block rules on the left and watch the latest feed entries update on the right before you save.
</p>
<div class="p-3 border border-dark rounded-0 small text-muted">
<p class="mb-2">
Use comma-separated terms or snippets for quick blocking. Use regex when the pattern is more specific.
</p>
<p class="mb-2">
Plain text matching is case-insensitive and partial, so <code>orld</code> matches <code>World of Warcraft</code>.
</p>
<p class="mb-2">Whitelist matches still win. If an entry matches both, the preview keeps it as sent.</p>
<p class="mb-0">Keep the left side for editing and the right side for checking what gets removed.</p>
</div>
</div>
<label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label>
<textarea name="regex_blacklist_title" class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_title"
rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea>
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">Regex Blacklist -
Summary</label>
<textarea name="regex_blacklist_summary" class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_summary"
rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea>
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">Regex Blacklist -
Content</label>
<textarea name="regex_blacklist_content" class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_content"
rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea>
<label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label>
<textarea name="regex_blacklist_author" class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_author"
rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea>
<form action="/blacklist"
method="post"
class="row g-3"
hx-get="/blacklist_preview"
hx-target="#filter-preview"
hx-swap="innerHTML"
hx-trigger="input delay:400ms, change delay:200ms">
<input type="hidden" name="feed_url" value="{{ feed.url }}" />
<div class="col-12">
<h3 class="h6 text-uppercase text-muted mb-3">Word Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<p class="mb-2">Comma separated terms or snippets. Spaces around commas are ignored.</p>
<p class="mb-0">
Example:
<code>primogem,events,orld,special program</code>
</p>
</div>
<label for="blacklist_title" class="form-label">Block if title contains</label>
<input name="blacklist_title"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_title"
value="{{ blacklist_title }}" />
</div>
<div class="col-12">
<label for="blacklist_summary" class="form-label">Block if summary contains</label>
<input name="blacklist_summary"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_summary"
value="{{ blacklist_summary }}" />
</div>
<div class="col-12">
<label for="blacklist_content" class="form-label">Block if content contains</label>
<input name="blacklist_content"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_content"
value="{{ blacklist_content }}" />
</div>
<div class="col-12">
<label for="blacklist_author" class="form-label">Block if author contains</label>
<input name="blacklist_author"
type="text"
class="form-control bg-dark border-dark text-muted"
id="blacklist_author"
value="{{ blacklist_author }}" />
</div>
<div class="col-12 pt-2">
<h3 class="h6 text-uppercase text-muted mb-3">Regex Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<p class="mb-2">One pattern per line. Matching is case-insensitive.</p>
<pre class="mb-0 filter-page__example">^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*</pre>
</div>
<label for="regex_blacklist_title" class="form-label">Block if title matches regex</label>
<textarea name="regex_blacklist_title"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_title"
rows="3">{{ regex_blacklist_title }}</textarea>
</div>
<div class="col-12">
<label for="regex_blacklist_summary" class="form-label">Block if summary matches regex</label>
<textarea name="regex_blacklist_summary"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_summary"
rows="3">{{ regex_blacklist_summary }}</textarea>
</div>
<div class="col-12">
<label for="regex_blacklist_content" class="form-label">Block if content matches regex</label>
<textarea name="regex_blacklist_content"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_content"
rows="3">{{ regex_blacklist_content }}</textarea>
</div>
<div class="col-12">
<label for="regex_blacklist_author" class="form-label">Block if author matches regex</label>
<textarea name="regex_blacklist_author"
class="form-control bg-dark border-dark text-muted"
id="regex_blacklist_author"
rows="3">{{ regex_blacklist_author }}</textarea>
</div>
<div class="col-12 d-flex flex-wrap gap-2 pt-2">
<button class="btn btn-dark btn-sm" type="submit">Update blacklist</button>
<a class="btn btn-outline-light btn-sm"
href="/feed?feed_url={{ feed.url|encode_url }}">Back to feed</a>
</div>
</form>
</div>
</div>
</section>
</div>
<!-- Add a hidden feed_url field to the form -->
<input type="hidden" name="feed_url" value="{{ feed.url }}" />
<!-- Submit button -->
<div class="d-md-flex">
<button class="btn btn-dark btn-sm">Update blacklist</button>
<div class="col-lg-7">
<section class="card border border-dark shadow-sm text-light rounded-0 h-100">
<div class="card-body p-3 p-md-4">
<div id="filter-preview">{% include "_filter_preview.html" %}</div>
</div>
</section>
</div>
</form>
</div>
</div>
{% endblock content %}

View file

@ -1,98 +1,124 @@
{% extends "base.html" %}
{% block title %}
| Whitelist
| Whitelist
{% endblock title %}
{% block content %}
<div class="p-2 border border-dark">
<form action="/whitelist" method="post">
<!-- Feed URL -->
<div class="row pb-2">
<div class="col-sm-12">
<div class="form-text">
<ul class="list-inline">
<li>
Comma separated list of words to whitelist. Only send message to
Discord if one of these words are present in the corresponding fields.
</li>
<li>Whitelist always takes precedence over blacklist. Leave empty to disable.</li>
<li>Words are case-insensitive. No spaces should be used before or after the comma.</li>
<li>
Correct:
<code>
primogem,events,gameplay preview,special program
</code>
</li>
<li>
Wrong:
<code>
primogem, events, gameplay preview, special program
</code>
</li>
</ul>
</div>
<label for="whitelist_title" class="col-sm-6 col-form-label">Whitelist - Title</label>
<input name="whitelist_title" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_title" value="{%- if whitelist_title -%}{{ whitelist_title }} {%- endif -%}" />
<label for="whitelist_summary" class="col-sm-6 col-form-label">Whitelist - Summary</label>
<input name="whitelist_summary" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_summary" value="{%- if whitelist_summary -%}{{ whitelist_summary }}{%- endif -%}" />
<label for="whitelist_content" class="col-sm-6 col-form-label">Whitelist - Content</label>
<input name="whitelist_content" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_content" value="{%- if whitelist_content -%}{{ whitelist_content }}{%- endif -%}" />
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
<input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
<div class="mt-4">
<div class="form-text">
<ul class="list-inline">
<li>
Regular expression patterns for advanced filtering. Each pattern should be on a new
line.
</li>
<li>Patterns are case-insensitive.</li>
<li>
Examples:
<code>
<pre>
^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*
</pre>
</code>
</li>
</ul>
<div class="row g-3 filter-page">
<div class="col-lg-5">
<section class="card border border-dark shadow-sm text-light rounded-0 filter-page__sidebar">
<div class="card-body p-3 p-md-4">
<div class="mb-4">
<h2 class="h4 mb-2">Whitelist Rules</h2>
<p class="text-muted mb-3">
Shape what is allowed through, and use the live pane to see which entries are the only ones that will still be sent.
</p>
<div class="p-3 border border-dark rounded-0 small text-muted">
<p class="mb-2">Whitelist rules are restrictive. If any whitelist rule exists, entries must match it to be sent.</p>
<p class="mb-2">
Plain text matching is case-insensitive and partial, so <code>orld</code> matches <code>World of Warcraft</code>.
</p>
<p class="mb-2">When an entry matches both lists, whitelist still wins and the preview shows it as sent.</p>
<p class="mb-0">Saved blacklist rules remain active while you preview whitelist edits.</p>
</div>
</div>
<label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label>
<textarea name="regex_whitelist_title" class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_title"
rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea>
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">Regex Whitelist -
Summary</label>
<textarea name="regex_whitelist_summary" class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_summary"
rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea>
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">Regex Whitelist -
Content</label>
<textarea name="regex_whitelist_content" class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_content"
rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea>
<label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label>
<textarea name="regex_whitelist_author" class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_author"
rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea>
<form action="/whitelist"
method="post"
class="row g-3"
hx-get="/whitelist_preview"
hx-target="#filter-preview"
hx-swap="innerHTML"
hx-trigger="input delay:400ms, change delay:200ms">
<input type="hidden" name="feed_url" value="{{ feed.url }}" />
<div class="col-12">
<h3 class="h6 text-uppercase text-muted mb-3">Word Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<p class="mb-2">Comma separated terms or snippets. Spaces around commas are ignored.</p>
<p class="mb-0">
Example:
<code>primogem,events,orld,special program</code>
</p>
</div>
<label for="whitelist_title" class="form-label">Allow if title contains</label>
<input name="whitelist_title"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_title"
value="{{ whitelist_title }}" />
</div>
<div class="col-12">
<label for="whitelist_summary" class="form-label">Allow if summary contains</label>
<input name="whitelist_summary"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_summary"
value="{{ whitelist_summary }}" />
</div>
<div class="col-12">
<label for="whitelist_content" class="form-label">Allow if content contains</label>
<input name="whitelist_content"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_content"
value="{{ whitelist_content }}" />
</div>
<div class="col-12">
<label for="whitelist_author" class="form-label">Allow if author contains</label>
<input name="whitelist_author"
type="text"
class="form-control bg-dark border-dark text-muted"
id="whitelist_author"
value="{{ whitelist_author }}" />
</div>
<div class="col-12 pt-2">
<h3 class="h6 text-uppercase text-muted mb-3">Regex Rules</h3>
<div class="p-3 border border-dark rounded-0 form-text mb-3">
<p class="mb-2">One pattern per line. Matching is case-insensitive.</p>
<pre class="mb-0 filter-page__example">^New Release:.*
\b(update|version|patch)\s+\d+\.\d+
.*\[(important|notice)\].*</pre>
</div>
<label for="regex_whitelist_title" class="form-label">Allow if title matches regex</label>
<textarea name="regex_whitelist_title"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_title"
rows="3">{{ regex_whitelist_title }}</textarea>
</div>
<div class="col-12">
<label for="regex_whitelist_summary" class="form-label">Allow if summary matches regex</label>
<textarea name="regex_whitelist_summary"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_summary"
rows="3">{{ regex_whitelist_summary }}</textarea>
</div>
<div class="col-12">
<label for="regex_whitelist_content" class="form-label">Allow if content matches regex</label>
<textarea name="regex_whitelist_content"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_content"
rows="3">{{ regex_whitelist_content }}</textarea>
</div>
<div class="col-12">
<label for="regex_whitelist_author" class="form-label">Allow if author matches regex</label>
<textarea name="regex_whitelist_author"
class="form-control bg-dark border-dark text-muted"
id="regex_whitelist_author"
rows="3">{{ regex_whitelist_author }}</textarea>
</div>
<div class="col-12 d-flex flex-wrap gap-2 pt-2">
<button class="btn btn-dark btn-sm" type="submit">Update whitelist</button>
<a class="btn btn-outline-light btn-sm"
href="/feed?feed_url={{ feed.url|encode_url }}">Back to feed</a>
</div>
</form>
</div>
</div>
</section>
</div>
<!-- Add a hidden feed_url field to the form -->
<input type="hidden" name="feed_url" value="{{ feed.url }}" />
<!-- Submit button -->
<div class="d-md-flex">
<button class="btn btn-dark btn-sm">Update whitelist</button>
<div class="col-lg-7">
<section class="card border border-dark shadow-sm text-light rounded-0 h-100">
<div class="card-body p-3 p-md-4">
<div id="filter-preview">{% include "_filter_preview.html" %}</div>
</div>
</section>
</div>
</form>
</div>
</div>
{% endblock content %}

View file

@ -11,6 +11,8 @@ from reader import make_reader
from discord_rss_bot.filter.blacklist import entry_should_be_skipped
from discord_rss_bot.filter.blacklist import feed_has_blacklist_tags
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
if TYPE_CHECKING:
from collections.abc import Iterable
@ -203,3 +205,54 @@ def test_regex_should_be_skipped() -> None:
)
reader.delete_tag(feed, "regex_blacklist_author")
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
def test_whitelist_match_overrides_blacklist_match() -> None:
"""A whitelist hit should beat a blacklist hit in the final decision."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
reader.set_tag(feed, "whitelist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
decision = evaluate_entry_filters(
first_entry[0],
blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"),
)
assert decision.should_send is True, "Whitelist match should override blacklist match"
assert decision.blacklist_match is not None, "Expected a blacklist match"
assert decision.whitelist_match is not None, "Expected a whitelist match"
assert "whitelist overrides blacklist" in decision.reason
def test_blacklist_substring_match_on_title() -> None:
"""Blacklist plain-text rules should match title substrings."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
reader.set_tag(feed, "blacklist_title", "vnnnfn") # pyright: ignore[reportArgumentType]
assert entry_should_be_skipped(reader, first_entry[0]) is True, "Substring title match should blacklist the entry"

View file

@ -37,6 +37,15 @@ def encoded_feed_url(url: str) -> str:
return urllib.parse.quote(feed_url) if url else ""
def ensure_preview_feed_exists() -> Reader:
reader: Reader = get_reader_dependency()
with contextlib.suppress(Exception):
reader.add_feed(feed_url)
with contextlib.suppress(Exception):
reader.update_feed(feed_url)
return reader
def test_search() -> None:
"""Test the /search page."""
# Remove the feed if it already exists before we run the test.
@ -221,6 +230,221 @@ def test_get() -> None:
assert response.status_code == 200, f"/whitelist failed: {response.text}"
def test_blacklist_page_uses_live_preview_layout() -> None:
ensure_preview_feed_exists()
response: Response = client.get(url="/blacklist", params={"feed_url": encoded_feed_url(feed_url)})
assert response.status_code == 200, f"/blacklist failed: {response.text}"
assert 'hx-get="/blacklist_preview"' in response.text
assert 'id="filter-preview"' in response.text
assert "Blacklist Rules" in response.text
def test_whitelist_page_uses_live_preview_layout() -> None:
ensure_preview_feed_exists()
response: Response = client.get(url="/whitelist", params={"feed_url": encoded_feed_url(feed_url)})
assert response.status_code == 200, f"/whitelist failed: {response.text}"
assert 'hx-get="/whitelist_preview"' in response.text
assert 'id="filter-preview"' in response.text
assert "Whitelist Rules" in response.text
def test_blacklist_preview_does_not_persist_unsaved_rules() -> None:
reader: Reader = ensure_preview_feed_exists()
reader.set_tag(feed_url, "blacklist_title", "saved-blacklist") # pyright: ignore[reportArgumentType]
try:
response: Response = client.get(
url="/blacklist_preview",
params={
"feed_url": feed_url,
"blacklist_title": "fvnnnfnfdnfdnfd",
},
)
assert response.status_code == 200, f"/blacklist_preview failed: {response.text}"
assert "Live preview" in response.text
assert reader.get_tag(feed_url, "blacklist_title", "") == "saved-blacklist"
finally:
with contextlib.suppress(Exception):
reader.delete_tag(feed_url, "blacklist_title")
def test_whitelist_preview_shows_precedence_over_blacklist() -> None:
reader: Reader = ensure_preview_feed_exists()
reader.set_tag(feed_url, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
try:
response: Response = client.get(
url="/whitelist_preview",
params={
"feed_url": feed_url,
"whitelist_title": "fvnnnfnfdnfdnfd",
},
)
assert response.status_code == 200, f"/whitelist_preview failed: {response.text}"
assert "whitelist overrides blacklist" in response.text
assert "Sent" in response.text
finally:
with contextlib.suppress(Exception):
reader.delete_tag(feed_url, "blacklist_title")
def test_blacklist_preview_uses_50_entry_limit() -> None:
@dataclass(slots=True)
class DummyContent:
value: str
@dataclass(slots=True)
class DummyFeed:
url: str
title: str
@dataclass(slots=True)
class DummyEntry:
id: str
feed: DummyFeed
title: str
summary: str
author: str
link: str
published: datetime | None
content: list[DummyContent] = field(default_factory=lambda: [DummyContent("content")])
class StubReader:
def __init__(self) -> None:
self.feed = DummyFeed(url="https://example.com/filter-preview.xml", title="Preview Feed")
self.recorded_limit: int | None = None
self.entries: list[Entry] = [
cast(
"Entry",
DummyEntry(
id=f"entry-{index}",
feed=self.feed,
title=f"Entry {index}",
summary=f"Summary {index}",
author="Author",
link=f"https://example.com/entry-{index}",
published=datetime(2024, 1, 1, tzinfo=UTC),
),
)
for index in range(60)
]
def get_feed(self, _feed_url: str) -> DummyFeed:
return self.feed
def get_entries(self, **kwargs: object) -> list[Entry]:
limit = kwargs.get("limit")
self.recorded_limit = limit if isinstance(limit, int) else None
if isinstance(limit, int):
return self.entries[:limit]
return self.entries
def get_tag(self, _resource: object, _key: str, default: object = None) -> object:
return default
stub_reader = StubReader()
app.dependency_overrides[get_reader_dependency] = lambda: stub_reader
try:
with patch("discord_rss_bot.main.create_html_for_feed", return_value="<div>Rendered</div>"):
response: Response = client.get(
url="/blacklist_preview",
params={"feed_url": stub_reader.feed.url},
)
assert response.status_code == 200, f"/blacklist_preview failed: {response.text}"
assert stub_reader.recorded_limit == 50, (
f"Expected preview to request 50 entries, got {stub_reader.recorded_limit}"
)
assert "50 checked" in response.text
finally:
app.dependency_overrides = {}
def test_blacklist_preview_shows_labeled_field_values_for_substring_match() -> None:
@dataclass(slots=True)
class DummyContent:
value: str
@dataclass(slots=True)
class DummyFeed:
url: str
title: str
@dataclass(slots=True)
class DummyEntry:
id: str
feed: DummyFeed
title: str
summary: str
author: str
link: str
published: datetime | None
content: list[DummyContent] = field(default_factory=list)
class StubReader:
def __init__(self) -> None:
self.feed = DummyFeed(url="https://example.com/wow.xml", title="Warcraft Feed")
self.entries: list[Entry] = [
cast(
"Entry",
DummyEntry(
id="wow-1",
feed=self.feed,
title="World of Warcraft",
summary="<p>Massive MMO news update</p>",
author="Blizzard",
link="https://example.com/wow-1",
published=datetime(2024, 1, 1, tzinfo=UTC),
content=[DummyContent("<p>The expansion launches soon.</p>")],
),
),
]
def get_feed(self, _feed_url: str) -> DummyFeed:
return self.feed
def get_entries(self, **_kwargs: object) -> list[Entry]:
return self.entries
def get_tag(self, _resource: object, _key: str, default: object = None) -> object:
return default
stub_reader = StubReader()
app.dependency_overrides[get_reader_dependency] = lambda: stub_reader
try:
with patch("discord_rss_bot.main.create_html_for_feed", return_value="<div>Rendered</div>"):
response: Response = client.get(
url="/blacklist_preview",
params={
"feed_url": stub_reader.feed.url,
"blacklist_title": "orld",
},
)
assert response.status_code == 200, f"/blacklist_preview failed: {response.text}"
assert "Skipped" in response.text
assert "World of Warcraft" in response.text
assert "Title" in response.text
assert "Author" in response.text
assert "Description" in response.text
assert "Content" in response.text
assert "filter-preview__field-row" in response.text
assert "filter-preview__match" in response.text
assert '<mark class="filter-preview__match filter-preview__match--danger">orld</mark>' in response.text
assert "Massive MMO news update" in response.text
assert "The expansion launches soon." in response.text
finally:
app.dependency_overrides = {}
def test_settings_page_shows_screenshot_layout_setting() -> None:
response: Response = client.get(url="/settings")
assert response.status_code == 200, f"/settings failed: {response.text}"

View file

@ -12,6 +12,8 @@ def test_is_word_in_text() -> None:
assert is_word_in_text("word1,word2", "This is a sample text containing word1.") is True, msg_true
assert is_word_in_text("word1,word2", "This is a sample text containing word2.") is True, msg_true
assert is_word_in_text("word1,word2", "This is a sample text containing WORD1 and WORD2.") is True, msg_true
assert is_word_in_text("orld", "World of Warcraft") is True, msg_true
assert is_word_in_text(" orld , craft ", "World of Warcraft") is True, msg_true
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false

View file

@ -9,6 +9,8 @@ from reader import Feed
from reader import Reader
from reader import make_reader
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent
@ -184,3 +186,54 @@ def test_regex_should_be_sent() -> None:
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
reader.delete_tag(feed, "regex_whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
def test_active_whitelist_blocks_non_matching_blacklisted_entry() -> None:
"""An active whitelist should block non-matching entries even if blacklist also matches."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
reader.set_tag(feed, "whitelist_title", "does-not-match") # pyright: ignore[reportArgumentType]
decision = evaluate_entry_filters(
first_entry[0],
blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"),
)
assert decision.should_send is False, "Entry should be skipped when whitelist is active but does not match"
assert decision.blacklist_match is not None, "Expected a blacklist match"
assert decision.whitelist_match is None, "Expected whitelist to miss"
assert "no whitelist rule matched" in decision.reason
def test_whitelist_substring_match_on_title() -> None:
"""Whitelist plain-text rules should match title substrings."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
reader.set_tag(feed, "whitelist_title", "vnnnfn") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Substring title match should whitelist the entry"