discord-rss-bot/tests/test_whitelist.py
Joakim Hellsén d85bc16904
All checks were successful
Test and build Docker image / docker (push) Successful in 30s
Make blacklist override whitelist
Change filter evaluation so blacklist matches take precedence over whitelist matches. Updated evaluator logic to skip entries when blacklist and whitelist both match, adjusted related branches to reflect the new decision flow, and updated a feeds.py comment to clarify the combined decision. Also updated blacklist/whitelist templates copy to reflect the new precedence and adjusted tests to expect blacklist-wins behavior.
2026-05-04 22:55:53 +02:00

239 lines
11 KiB
Python

from __future__ import annotations
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING
from reader import Entry
from reader import Feed
from reader import Reader
from reader import make_reader
from discord_rss_bot.filter.evaluator import evaluate_entry_filters
from discord_rss_bot.filter.evaluator import get_filter_values_from_reader
from discord_rss_bot.filter.whitelist import has_white_tags
from discord_rss_bot.filter.whitelist import should_be_sent
if TYPE_CHECKING:
from collections.abc import Iterable
feed_url: str = "https://lovinator.space/rss_test.xml"
# Create the database
def get_reader() -> Reader:
tempdir: Path = Path(tempfile.mkdtemp())
reader_database: Path = tempdir / "test.sqlite"
reader: Reader = make_reader(url=str(reader_database))
return reader
def test_has_white_tags() -> None:
reader: Reader = get_reader()
# Add feed and update entries
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
# Test feed without any whitelist tags
assert has_white_tags(reader=get_reader(), feed=feed) is False, "Feed should not have any whitelist tags"
check_if_has_tag(reader, feed, "whitelist_title")
check_if_has_tag(reader, feed, "whitelist_summary")
check_if_has_tag(reader, feed, "whitelist_content")
check_if_has_tag(reader, feed, "whitelist_author")
# Test regex whitelist tags
check_if_has_tag(reader, feed, "regex_whitelist_title")
check_if_has_tag(reader, feed, "regex_whitelist_summary")
check_if_has_tag(reader, feed, "regex_whitelist_content")
check_if_has_tag(reader, feed, "regex_whitelist_author")
# Clean up
reader.delete_feed(feed_url)
def check_if_has_tag(reader: Reader, feed: Feed, whitelist_name: str) -> None:
reader.set_tag(feed, whitelist_name, "a") # pyright: ignore[reportArgumentType]
assert has_white_tags(reader=reader, feed=feed) is True, "Feed should have whitelist tags"
reader.delete_tag(feed, whitelist_name)
assert has_white_tags(reader=reader, feed=feed) is False, "Feed should not have any whitelist tags"
def test_should_be_sent() -> None:
reader: Reader = get_reader()
# Add feed and update entries
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
# Get first entry
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
assert entries is not None, "Entries should not be None"
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
# Test entry without any whitelists
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent"
reader.delete_tag(feed, "whitelist_title")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_title", "åäö") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.delete_tag(feed, "whitelist_title")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_summary", "ffdnfdnfdnfdnfdndfn") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent"
reader.delete_tag(feed, "whitelist_summary")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_summary", "åäö") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.delete_tag(feed, "whitelist_summary")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_content", "ffdnfdnfdnfdnfdndfn") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent"
reader.delete_tag(feed, "whitelist_content")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_content", "åäö") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.delete_tag(feed, "whitelist_content")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_author", "TheLovinator") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent"
reader.delete_tag(feed, "whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.set_tag(feed, "whitelist_author", "åäö") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.delete_tag(feed, "whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
def test_regex_should_be_sent() -> None:
"""Test the regex filtering functionality for whitelist."""
reader: Reader = get_reader()
# Add feed and update entries
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
# Get first entry
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
assert entries is not None, "Entries should not be None"
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
# Test entry without any regex whitelists
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
# Test regex whitelist for title
reader.set_tag(feed, "regex_whitelist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex title match"
reader.delete_tag(feed, "regex_whitelist_title")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
# Test regex whitelist for summary
reader.set_tag(feed, "regex_whitelist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex summary match"
reader.delete_tag(feed, "regex_whitelist_summary")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
# Test regex whitelist for content
reader.set_tag(feed, "regex_whitelist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex content match"
reader.delete_tag(feed, "regex_whitelist_content")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
# Test regex whitelist for author
reader.set_tag(feed, "regex_whitelist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex author match"
reader.delete_tag(feed, "regex_whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
# Test invalid regex pattern (should not raise an exception)
reader.set_tag(feed, "regex_whitelist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent with invalid regex"
reader.delete_tag(feed, "regex_whitelist_title")
# Test multiple regex patterns separated by commas
reader.set_tag(feed, "regex_whitelist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with one matching pattern in list"
reader.delete_tag(feed, "regex_whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
# Test newline-separated regex patterns
newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
reader.set_tag(feed, "regex_whitelist_author", newline_patterns) # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
reader.delete_tag(feed, "regex_whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
def test_blacklist_blocks_when_active_whitelist_misses() -> None:
"""A blacklist hit should block when an active whitelist does not match."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
reader.set_tag(feed, "whitelist_title", "does-not-match") # pyright: ignore[reportArgumentType]
decision = evaluate_entry_filters(
first_entry[0],
blacklist_values=get_filter_values_from_reader(reader, feed, "blacklist"),
whitelist_values=get_filter_values_from_reader(reader, feed, "whitelist"),
)
assert decision.should_send is False, "Entry should be skipped when blacklist matches"
assert decision.blacklist_match is not None, "Expected a blacklist match"
assert decision.whitelist_match is None, "Expected whitelist to miss"
assert "blacklist text match on title" in decision.reason
def test_whitelist_substring_match_on_title() -> None:
"""Whitelist plain-text rules should match title substrings."""
reader: Reader = get_reader()
reader.add_feed(feed_url)
feed: Feed = reader.get_feed(feed_url)
reader.update_feeds()
first_entry: list[Entry] = []
entries: Iterable[Entry] = reader.get_entries(feed=feed)
for entry in entries:
first_entry.append(entry)
break
assert len(first_entry) == 1, "First entry should be added"
reader.set_tag(feed, "whitelist_title", "vnnnfn") # pyright: ignore[reportArgumentType]
assert should_be_sent(reader, first_entry[0]) is True, "Substring title match should whitelist the entry"