Add regex support to blacklist and whitelist filters. Strong code, many bananas! 🦍🦍🦍🦍

This commit is contained in:
2025-04-03 05:44:50 +02:00
parent 84e39c9f79
commit ac63041b28
11 changed files with 526 additions and 39 deletions

View File

@ -1,6 +1,6 @@
from __future__ import annotations
from discord_rss_bot.filter.utils import is_word_in_text
from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
def test_is_word_in_text() -> None:
@ -14,3 +14,51 @@ def test_is_word_in_text() -> None:
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false
def test_is_regex_match() -> None:
msg_true = "Should return True"
msg_false = "Should return False"
# Test basic regex patterns
assert is_regex_match(r"word\d+", "This text contains word123") is True, msg_true
assert is_regex_match(r"^Hello", "Hello world") is True, msg_true
assert is_regex_match(r"world$", "Hello world") is True, msg_true
# Test case insensitivity
assert is_regex_match(r"hello", "This text contains HELLO") is True, msg_true
# Test comma-separated patterns
assert is_regex_match(r"pattern1,pattern2", "This contains pattern2") is True, msg_true
assert is_regex_match(r"pattern1, pattern2", "This contains pattern1") is True, msg_true
# Test regex that shouldn't match
assert is_regex_match(r"^start", "This doesn't start with the pattern") is False, msg_false
assert is_regex_match(r"end$", "This doesn't end with the pattern") is False, msg_false
# Test with empty input
assert is_regex_match("", "Some text") is False, msg_false
assert is_regex_match("pattern", "") is False, msg_false
# Test with invalid regex (should not raise an exception and return False)
assert is_regex_match(r"[incomplete", "Some text") is False, msg_false
# Test with multiple patterns where one is invalid
assert is_regex_match(r"valid, [invalid, \w+", "Contains word") is True, msg_true
# Test newline-separated patterns
newline_patterns = "pattern1\n^start\ncontains\\d+"
assert is_regex_match(newline_patterns, "This contains123 text") is True, msg_true
assert is_regex_match(newline_patterns, "start of line") is True, msg_true
assert is_regex_match(newline_patterns, "pattern1 is here") is True, msg_true
assert is_regex_match(newline_patterns, "None of these match") is False, msg_false
# Test mixed newline and comma patterns (for backward compatibility)
mixed_patterns = "pattern1\npattern2,pattern3\npattern4"
assert is_regex_match(mixed_patterns, "Contains pattern3") is True, msg_true
assert is_regex_match(mixed_patterns, "Contains pattern4") is True, msg_true
# Test with empty lines and spaces
whitespace_patterns = "\\s+\n \n\npattern\n\n"
assert is_regex_match(whitespace_patterns, "text with spaces") is True, msg_true
assert is_regex_match(whitespace_patterns, "text with pattern") is True, msg_true