Add regex support to blacklist and whitelist filters. Strong code, many bananas! 🦍🦍🦍🦍
This commit is contained in:
@ -39,6 +39,13 @@ def test_has_black_tags() -> None:
|
||||
check_if_has_tag(reader, feed, "blacklist_title")
|
||||
check_if_has_tag(reader, feed, "blacklist_summary")
|
||||
check_if_has_tag(reader, feed, "blacklist_content")
|
||||
check_if_has_tag(reader, feed, "blacklist_author")
|
||||
|
||||
# Test regex blacklist tags
|
||||
check_if_has_tag(reader, feed, "regex_blacklist_title")
|
||||
check_if_has_tag(reader, feed, "regex_blacklist_summary")
|
||||
check_if_has_tag(reader, feed, "regex_blacklist_content")
|
||||
check_if_has_tag(reader, feed, "regex_blacklist_author")
|
||||
|
||||
# Clean up
|
||||
reader.delete_feed(feed_url)
|
||||
@ -74,6 +81,7 @@ def test_should_be_skipped() -> None:
|
||||
# Test entry without any blacklists
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test standard blacklist functionality
|
||||
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, f"Entry should be skipped: {first_entry[0]}"
|
||||
reader.delete_tag(feed, "blacklist_title")
|
||||
@ -113,3 +121,81 @@ def test_should_be_skipped() -> None:
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
reader.delete_tag(feed, "blacklist_author")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
|
||||
def test_regex_should_be_skipped() -> None:
|
||||
"""Test the regex filtering functionality for blacklist."""
|
||||
reader: Reader = get_reader()
|
||||
|
||||
# Add feed and update entries
|
||||
reader.add_feed(feed_url)
|
||||
feed: Feed = reader.get_feed(feed_url)
|
||||
reader.update_feeds()
|
||||
|
||||
# Get first entry
|
||||
first_entry: list[Entry] = []
|
||||
entries: Iterable[Entry] = reader.get_entries(feed=feed)
|
||||
assert entries is not None, f"Entries should not be None: {entries}"
|
||||
for entry in entries:
|
||||
first_entry.append(entry)
|
||||
break
|
||||
assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
|
||||
|
||||
# Test entry without any regex blacklists
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test regex blacklist for title
|
||||
reader.set_tag(feed, "regex_blacklist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||
f"Entry should be skipped with regex title match: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_title")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test regex blacklist for summary
|
||||
reader.set_tag(feed, "regex_blacklist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||
f"Entry should be skipped with regex summary match: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_summary")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test regex blacklist for content
|
||||
reader.set_tag(feed, "regex_blacklist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||
f"Entry should be skipped with regex content match: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_content")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test regex blacklist for author
|
||||
reader.set_tag(feed, "regex_blacklist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||
f"Entry should be skipped with regex author match: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_author")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test invalid regex pattern (should not raise an exception)
|
||||
reader.set_tag(feed, "regex_blacklist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, (
|
||||
f"Entry should not be skipped with invalid regex: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_title")
|
||||
|
||||
# Test multiple regex patterns separated by commas
|
||||
reader.set_tag(feed, "regex_blacklist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||
f"Entry should be skipped with one matching pattern in list: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_author")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
||||
# Test newline-separated regex patterns
|
||||
newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
|
||||
reader.set_tag(feed, "regex_blacklist_author", newline_patterns) # pyright: ignore[reportArgumentType]
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||
f"Entry should be skipped with newline-separated patterns: {first_entry[0]}"
|
||||
)
|
||||
reader.delete_tag(feed, "regex_blacklist_author")
|
||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||
|
@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from discord_rss_bot.filter.utils import is_word_in_text
|
||||
from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
|
||||
|
||||
|
||||
def test_is_word_in_text() -> None:
|
||||
@ -14,3 +14,51 @@ def test_is_word_in_text() -> None:
|
||||
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
|
||||
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
|
||||
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false
|
||||
|
||||
|
||||
def test_is_regex_match() -> None:
|
||||
msg_true = "Should return True"
|
||||
msg_false = "Should return False"
|
||||
|
||||
# Test basic regex patterns
|
||||
assert is_regex_match(r"word\d+", "This text contains word123") is True, msg_true
|
||||
assert is_regex_match(r"^Hello", "Hello world") is True, msg_true
|
||||
assert is_regex_match(r"world$", "Hello world") is True, msg_true
|
||||
|
||||
# Test case insensitivity
|
||||
assert is_regex_match(r"hello", "This text contains HELLO") is True, msg_true
|
||||
|
||||
# Test comma-separated patterns
|
||||
assert is_regex_match(r"pattern1,pattern2", "This contains pattern2") is True, msg_true
|
||||
assert is_regex_match(r"pattern1, pattern2", "This contains pattern1") is True, msg_true
|
||||
|
||||
# Test regex that shouldn't match
|
||||
assert is_regex_match(r"^start", "This doesn't start with the pattern") is False, msg_false
|
||||
assert is_regex_match(r"end$", "This doesn't end with the pattern") is False, msg_false
|
||||
|
||||
# Test with empty input
|
||||
assert is_regex_match("", "Some text") is False, msg_false
|
||||
assert is_regex_match("pattern", "") is False, msg_false
|
||||
|
||||
# Test with invalid regex (should not raise an exception and return False)
|
||||
assert is_regex_match(r"[incomplete", "Some text") is False, msg_false
|
||||
|
||||
# Test with multiple patterns where one is invalid
|
||||
assert is_regex_match(r"valid, [invalid, \w+", "Contains word") is True, msg_true
|
||||
|
||||
# Test newline-separated patterns
|
||||
newline_patterns = "pattern1\n^start\ncontains\\d+"
|
||||
assert is_regex_match(newline_patterns, "This contains123 text") is True, msg_true
|
||||
assert is_regex_match(newline_patterns, "start of line") is True, msg_true
|
||||
assert is_regex_match(newline_patterns, "pattern1 is here") is True, msg_true
|
||||
assert is_regex_match(newline_patterns, "None of these match") is False, msg_false
|
||||
|
||||
# Test mixed newline and comma patterns (for backward compatibility)
|
||||
mixed_patterns = "pattern1\npattern2,pattern3\npattern4"
|
||||
assert is_regex_match(mixed_patterns, "Contains pattern3") is True, msg_true
|
||||
assert is_regex_match(mixed_patterns, "Contains pattern4") is True, msg_true
|
||||
|
||||
# Test with empty lines and spaces
|
||||
whitespace_patterns = "\\s+\n \n\npattern\n\n"
|
||||
assert is_regex_match(whitespace_patterns, "text with spaces") is True, msg_true
|
||||
assert is_regex_match(whitespace_patterns, "text with pattern") is True, msg_true
|
||||
|
@ -38,6 +38,13 @@ def test_has_white_tags() -> None:
|
||||
check_if_has_tag(reader, feed, "whitelist_title")
|
||||
check_if_has_tag(reader, feed, "whitelist_summary")
|
||||
check_if_has_tag(reader, feed, "whitelist_content")
|
||||
check_if_has_tag(reader, feed, "whitelist_author")
|
||||
|
||||
# Test regex whitelist tags
|
||||
check_if_has_tag(reader, feed, "regex_whitelist_title")
|
||||
check_if_has_tag(reader, feed, "regex_whitelist_summary")
|
||||
check_if_has_tag(reader, feed, "regex_whitelist_content")
|
||||
check_if_has_tag(reader, feed, "regex_whitelist_author")
|
||||
|
||||
# Clean up
|
||||
reader.delete_feed(feed_url)
|
||||
@ -109,3 +116,67 @@ def test_should_be_sent() -> None:
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
reader.delete_tag(feed, "whitelist_author")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
|
||||
def test_regex_should_be_sent() -> None:
|
||||
"""Test the regex filtering functionality for whitelist."""
|
||||
reader: Reader = get_reader()
|
||||
|
||||
# Add feed and update entries
|
||||
reader.add_feed(feed_url)
|
||||
feed: Feed = reader.get_feed(feed_url)
|
||||
reader.update_feeds()
|
||||
|
||||
# Get first entry
|
||||
first_entry: list[Entry] = []
|
||||
entries: Iterable[Entry] = reader.get_entries(feed=feed)
|
||||
assert entries is not None, "Entries should not be None"
|
||||
for entry in entries:
|
||||
first_entry.append(entry)
|
||||
break
|
||||
assert len(first_entry) == 1, "First entry should be added"
|
||||
|
||||
# Test entry without any regex whitelists
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
# Test regex whitelist for title
|
||||
reader.set_tag(feed, "regex_whitelist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex title match"
|
||||
reader.delete_tag(feed, "regex_whitelist_title")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
# Test regex whitelist for summary
|
||||
reader.set_tag(feed, "regex_whitelist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex summary match"
|
||||
reader.delete_tag(feed, "regex_whitelist_summary")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
# Test regex whitelist for content
|
||||
reader.set_tag(feed, "regex_whitelist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex content match"
|
||||
reader.delete_tag(feed, "regex_whitelist_content")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
# Test regex whitelist for author
|
||||
reader.set_tag(feed, "regex_whitelist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex author match"
|
||||
reader.delete_tag(feed, "regex_whitelist_author")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
# Test invalid regex pattern (should not raise an exception)
|
||||
reader.set_tag(feed, "regex_whitelist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent with invalid regex"
|
||||
reader.delete_tag(feed, "regex_whitelist_title")
|
||||
|
||||
# Test multiple regex patterns separated by commas
|
||||
reader.set_tag(feed, "regex_whitelist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with one matching pattern in list"
|
||||
reader.delete_tag(feed, "regex_whitelist_author")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
||||
# Test newline-separated regex patterns
|
||||
newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
|
||||
reader.set_tag(feed, "regex_whitelist_author", newline_patterns) # pyright: ignore[reportArgumentType]
|
||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
|
||||
reader.delete_tag(feed, "regex_whitelist_author")
|
||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||
|
Reference in New Issue
Block a user