Add regex support to blacklist and whitelist filters. Strong code, many bananas! 🦍🦍🦍🦍
This commit is contained in:
@ -38,7 +38,7 @@ repos:
|
|||||||
|
|
||||||
# An extremely fast Python linter and formatter.
|
# An extremely fast Python linter and formatter.
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.9.5
|
rev: v0.11.2
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
- id: ruff
|
- id: ruff
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from discord_rss_bot.filter.utils import is_word_in_text
|
from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from reader import Entry, Feed, Reader
|
from reader import Entry, Feed, Reader
|
||||||
@ -12,9 +12,14 @@ def feed_has_blacklist_tags(custom_reader: Reader, feed: Feed) -> bool:
|
|||||||
"""Return True if the feed has blacklist tags.
|
"""Return True if the feed has blacklist tags.
|
||||||
|
|
||||||
The following tags are checked:
|
The following tags are checked:
|
||||||
- blacklist_title
|
- blacklist_author
|
||||||
|
- blacklist_content
|
||||||
- blacklist_summary
|
- blacklist_summary
|
||||||
- blacklist_content.
|
- blacklist_title
|
||||||
|
- regex_blacklist_author
|
||||||
|
- regex_blacklist_content
|
||||||
|
- regex_blacklist_summary
|
||||||
|
- regex_blacklist_title
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
custom_reader: The reader.
|
custom_reader: The reader.
|
||||||
@ -23,14 +28,29 @@ def feed_has_blacklist_tags(custom_reader: Reader, feed: Feed) -> bool:
|
|||||||
Returns:
|
Returns:
|
||||||
bool: If the feed has any of the tags.
|
bool: If the feed has any of the tags.
|
||||||
"""
|
"""
|
||||||
blacklist_title: str = str(custom_reader.get_tag(feed, "blacklist_title", ""))
|
blacklist_author: str = str(custom_reader.get_tag(feed, "blacklist_author", "")).strip()
|
||||||
blacklist_summary: str = str(custom_reader.get_tag(feed, "blacklist_summary", ""))
|
blacklist_content: str = str(custom_reader.get_tag(feed, "blacklist_content", "")).strip()
|
||||||
blacklist_content: str = str(custom_reader.get_tag(feed, "blacklist_content", ""))
|
blacklist_summary: str = str(custom_reader.get_tag(feed, "blacklist_summary", "")).strip()
|
||||||
|
blacklist_title: str = str(custom_reader.get_tag(feed, "blacklist_title", "")).strip()
|
||||||
|
|
||||||
return bool(blacklist_title or blacklist_summary or blacklist_content)
|
regex_blacklist_author: str = str(custom_reader.get_tag(feed, "regex_blacklist_author", "")).strip()
|
||||||
|
regex_blacklist_content: str = str(custom_reader.get_tag(feed, "regex_blacklist_content", "")).strip()
|
||||||
|
regex_blacklist_summary: str = str(custom_reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
|
||||||
|
regex_blacklist_title: str = str(custom_reader.get_tag(feed, "regex_blacklist_title", "")).strip()
|
||||||
|
|
||||||
|
return bool(
|
||||||
|
blacklist_title
|
||||||
|
or blacklist_author
|
||||||
|
or blacklist_content
|
||||||
|
or blacklist_summary
|
||||||
|
or regex_blacklist_author
|
||||||
|
or regex_blacklist_content
|
||||||
|
or regex_blacklist_summary
|
||||||
|
or regex_blacklist_title,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def entry_should_be_skipped(custom_reader: Reader, entry: Entry) -> bool:
|
def entry_should_be_skipped(custom_reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
||||||
"""Return True if the entry is in the blacklist.
|
"""Return True if the entry is in the blacklist.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -40,21 +60,58 @@ def entry_should_be_skipped(custom_reader: Reader, entry: Entry) -> bool:
|
|||||||
Returns:
|
Returns:
|
||||||
bool: If the entry is in the blacklist.
|
bool: If the entry is in the blacklist.
|
||||||
"""
|
"""
|
||||||
blacklist_title: str = str(custom_reader.get_tag(entry.feed, "blacklist_title", ""))
|
feed = entry.feed
|
||||||
blacklist_summary: str = str(custom_reader.get_tag(entry.feed, "blacklist_summary", ""))
|
|
||||||
blacklist_content: str = str(custom_reader.get_tag(entry.feed, "blacklist_content", ""))
|
blacklist_title: str = str(custom_reader.get_tag(feed, "blacklist_title", "")).strip()
|
||||||
blacklist_author: str = str(custom_reader.get_tag(entry.feed, "blacklist_author", ""))
|
blacklist_summary: str = str(custom_reader.get_tag(feed, "blacklist_summary", "")).strip()
|
||||||
|
blacklist_content: str = str(custom_reader.get_tag(feed, "blacklist_content", "")).strip()
|
||||||
|
blacklist_author: str = str(custom_reader.get_tag(feed, "blacklist_author", "")).strip()
|
||||||
|
|
||||||
|
regex_blacklist_title: str = str(custom_reader.get_tag(feed, "regex_blacklist_title", "")).strip()
|
||||||
|
regex_blacklist_summary: str = str(custom_reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
|
||||||
|
regex_blacklist_content: str = str(custom_reader.get_tag(feed, "regex_blacklist_content", "")).strip()
|
||||||
|
regex_blacklist_author: str = str(custom_reader.get_tag(feed, "regex_blacklist_author", "")).strip()
|
||||||
# TODO(TheLovinator): Also add support for entry_text and more.
|
# TODO(TheLovinator): Also add support for entry_text and more.
|
||||||
|
|
||||||
|
# Check regular blacklist
|
||||||
if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
|
if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
|
||||||
return True
|
return True
|
||||||
if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
|
if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
|
||||||
return True
|
return True
|
||||||
|
if (
|
||||||
|
entry.content
|
||||||
|
and entry.content[0].value
|
||||||
|
and blacklist_content
|
||||||
|
and is_word_in_text(blacklist_content, entry.content[0].value)
|
||||||
|
):
|
||||||
|
return True
|
||||||
if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
|
if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
|
||||||
return True
|
return True
|
||||||
|
if (
|
||||||
|
entry.content
|
||||||
|
and entry.content[0].value
|
||||||
|
and blacklist_content
|
||||||
|
and is_word_in_text(blacklist_content, entry.content[0].value)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check regex blacklist
|
||||||
|
if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title):
|
||||||
|
return True
|
||||||
|
if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary):
|
||||||
|
return True
|
||||||
|
if (
|
||||||
|
entry.content
|
||||||
|
and entry.content[0].value
|
||||||
|
and regex_blacklist_content
|
||||||
|
and is_regex_match(regex_blacklist_content, entry.content[0].value)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author):
|
||||||
|
return True
|
||||||
return bool(
|
return bool(
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and blacklist_content
|
and regex_blacklist_content
|
||||||
and is_word_in_text(blacklist_content, entry.content[0].value),
|
and is_regex_match(regex_blacklist_content, entry.content[0].value),
|
||||||
)
|
)
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def is_word_in_text(word_string: str, text: str) -> bool:
|
def is_word_in_text(word_string: str, text: str) -> bool:
|
||||||
"""Check if any of the words are in the text.
|
"""Check if any of the words are in the text.
|
||||||
@ -20,3 +23,50 @@ def is_word_in_text(word_string: str, text: str) -> bool:
|
|||||||
|
|
||||||
# Check if any pattern matches the text.
|
# Check if any pattern matches the text.
|
||||||
return any(pattern.search(text) for pattern in patterns)
|
return any(pattern.search(text) for pattern in patterns)
|
||||||
|
|
||||||
|
|
||||||
|
def is_regex_match(regex_string: str, text: str) -> bool:
|
||||||
|
"""Check if any of the regex patterns match the text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
regex_string: A string containing regex patterns, separated by newlines or commas.
|
||||||
|
text: The text to search in.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if any regex pattern matches the text, otherwise False.
|
||||||
|
"""
|
||||||
|
if not regex_string or not text:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Split by newlines first, then by commas (for backward compatibility)
|
||||||
|
regex_list: list[str] = []
|
||||||
|
|
||||||
|
# First split by newlines
|
||||||
|
lines: list[str] = regex_string.split("\n")
|
||||||
|
for line in lines:
|
||||||
|
stripped_line: str = line.strip()
|
||||||
|
if stripped_line:
|
||||||
|
# For backward compatibility, also split by commas if there are any
|
||||||
|
if "," in stripped_line:
|
||||||
|
regex_list.extend([part.strip() for part in stripped_line.split(",") if part.strip()])
|
||||||
|
else:
|
||||||
|
regex_list.append(stripped_line)
|
||||||
|
|
||||||
|
# Attempt to compile and apply each regex pattern
|
||||||
|
for pattern_str in regex_list:
|
||||||
|
if not pattern_str:
|
||||||
|
logger.warning("Empty regex pattern found in the list.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
pattern: re.Pattern[str] = re.compile(pattern_str, re.IGNORECASE)
|
||||||
|
if pattern.search(text):
|
||||||
|
logger.info("Regex pattern matched: %s", pattern_str)
|
||||||
|
return True
|
||||||
|
except re.error:
|
||||||
|
logger.warning("Invalid regex pattern: %s", pattern_str)
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info("No regex patterns matched.")
|
||||||
|
|
||||||
|
return False
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from discord_rss_bot.filter.utils import is_word_in_text
|
from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from reader import Entry, Feed, Reader
|
from reader import Entry, Feed, Reader
|
||||||
@ -12,9 +12,14 @@ def has_white_tags(custom_reader: Reader, feed: Feed) -> bool:
|
|||||||
"""Return True if the feed has whitelist tags.
|
"""Return True if the feed has whitelist tags.
|
||||||
|
|
||||||
The following tags are checked:
|
The following tags are checked:
|
||||||
- whitelist_title
|
- regex_whitelist_author
|
||||||
|
- regex_whitelist_content
|
||||||
|
- regex_whitelist_summary
|
||||||
|
- regex_whitelist_title
|
||||||
|
- whitelist_author
|
||||||
|
- whitelist_content
|
||||||
- whitelist_summary
|
- whitelist_summary
|
||||||
- whitelist_content.
|
- whitelist_title
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
custom_reader: The reader.
|
custom_reader: The reader.
|
||||||
@ -23,14 +28,29 @@ def has_white_tags(custom_reader: Reader, feed: Feed) -> bool:
|
|||||||
Returns:
|
Returns:
|
||||||
bool: If the feed has any of the tags.
|
bool: If the feed has any of the tags.
|
||||||
"""
|
"""
|
||||||
whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", ""))
|
whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", "")).strip()
|
||||||
whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", ""))
|
whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", "")).strip()
|
||||||
whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", ""))
|
whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", "")).strip()
|
||||||
|
whitelist_author: str = str(custom_reader.get_tag(feed, "whitelist_author", "")).strip()
|
||||||
|
|
||||||
return bool(whitelist_title or whitelist_summary or whitelist_content)
|
regex_whitelist_title: str = str(custom_reader.get_tag(feed, "regex_whitelist_title", "")).strip()
|
||||||
|
regex_whitelist_summary: str = str(custom_reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
|
||||||
|
regex_whitelist_content: str = str(custom_reader.get_tag(feed, "regex_whitelist_content", "")).strip()
|
||||||
|
regex_whitelist_author: str = str(custom_reader.get_tag(feed, "regex_whitelist_author", "")).strip()
|
||||||
|
|
||||||
|
return bool(
|
||||||
|
whitelist_title
|
||||||
|
or whitelist_author
|
||||||
|
or whitelist_content
|
||||||
|
or whitelist_summary
|
||||||
|
or regex_whitelist_author
|
||||||
|
or regex_whitelist_content
|
||||||
|
or regex_whitelist_summary
|
||||||
|
or regex_whitelist_title,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def should_be_sent(custom_reader: Reader, entry: Entry) -> bool:
|
def should_be_sent(custom_reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
||||||
"""Return True if the entry is in the whitelist.
|
"""Return True if the entry is in the whitelist.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -41,20 +61,43 @@ def should_be_sent(custom_reader: Reader, entry: Entry) -> bool:
|
|||||||
bool: If the entry is in the whitelist.
|
bool: If the entry is in the whitelist.
|
||||||
"""
|
"""
|
||||||
feed: Feed = entry.feed
|
feed: Feed = entry.feed
|
||||||
whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", ""))
|
# Regular whitelist tags
|
||||||
whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", ""))
|
whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", "")).strip()
|
||||||
whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", ""))
|
whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", "")).strip()
|
||||||
whitelist_author: str = str(custom_reader.get_tag(feed, "whitelist_author", ""))
|
whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", "")).strip()
|
||||||
|
whitelist_author: str = str(custom_reader.get_tag(feed, "whitelist_author", "")).strip()
|
||||||
|
|
||||||
|
# Regex whitelist tags
|
||||||
|
regex_whitelist_title: str = str(custom_reader.get_tag(feed, "regex_whitelist_title", "")).strip()
|
||||||
|
regex_whitelist_summary: str = str(custom_reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
|
||||||
|
regex_whitelist_content: str = str(custom_reader.get_tag(feed, "regex_whitelist_content", "")).strip()
|
||||||
|
regex_whitelist_author: str = str(custom_reader.get_tag(feed, "regex_whitelist_author", "")).strip()
|
||||||
|
|
||||||
|
# Check regular whitelist
|
||||||
if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
|
if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
|
||||||
return True
|
return True
|
||||||
if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
|
if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
|
||||||
return True
|
return True
|
||||||
if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
|
if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
|
||||||
return True
|
return True
|
||||||
return bool(
|
if (
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and whitelist_content
|
and whitelist_content
|
||||||
and is_word_in_text(whitelist_content, entry.content[0].value),
|
and is_word_in_text(whitelist_content, entry.content[0].value)
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check regex whitelist
|
||||||
|
if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title):
|
||||||
|
return True
|
||||||
|
if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary):
|
||||||
|
return True
|
||||||
|
if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author):
|
||||||
|
return True
|
||||||
|
return bool(
|
||||||
|
entry.content
|
||||||
|
and entry.content[0].value
|
||||||
|
and regex_whitelist_content
|
||||||
|
and is_regex_match(regex_whitelist_content, entry.content[0].value),
|
||||||
)
|
)
|
||||||
|
@ -43,7 +43,7 @@ from discord_rss_bot.search import create_html_for_search_results
|
|||||||
from discord_rss_bot.settings import get_reader
|
from discord_rss_bot.settings import get_reader
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Iterable
|
from collections.abc import AsyncGenerator, Iterable
|
||||||
|
|
||||||
from reader.types import JSONType
|
from reader.types import JSONType
|
||||||
|
|
||||||
@ -88,8 +88,15 @@ reader: Reader = get_reader()
|
|||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI) -> typing.AsyncGenerator[None]:
|
async def lifespan(app: FastAPI) -> AsyncGenerator[None]:
|
||||||
"""This is needed for the ASGI server to run."""
|
"""Lifespan for the FastAPI app.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app: The FastAPI app.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
None: Nothing.
|
||||||
|
"""
|
||||||
add_missing_tags(reader)
|
add_missing_tags(reader)
|
||||||
scheduler: AsyncIOScheduler = AsyncIOScheduler()
|
scheduler: AsyncIOScheduler = AsyncIOScheduler()
|
||||||
|
|
||||||
@ -250,6 +257,10 @@ async def post_set_whitelist(
|
|||||||
whitelist_summary: Annotated[str, Form()] = "",
|
whitelist_summary: Annotated[str, Form()] = "",
|
||||||
whitelist_content: Annotated[str, Form()] = "",
|
whitelist_content: Annotated[str, Form()] = "",
|
||||||
whitelist_author: Annotated[str, Form()] = "",
|
whitelist_author: Annotated[str, Form()] = "",
|
||||||
|
regex_whitelist_title: Annotated[str, Form()] = "",
|
||||||
|
regex_whitelist_summary: Annotated[str, Form()] = "",
|
||||||
|
regex_whitelist_content: Annotated[str, Form()] = "",
|
||||||
|
regex_whitelist_author: Annotated[str, Form()] = "",
|
||||||
feed_url: Annotated[str, Form()] = "",
|
feed_url: Annotated[str, Form()] = "",
|
||||||
) -> RedirectResponse:
|
) -> RedirectResponse:
|
||||||
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
|
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
|
||||||
@ -259,6 +270,10 @@ async def post_set_whitelist(
|
|||||||
whitelist_summary: Whitelisted words for when checking the summary.
|
whitelist_summary: Whitelisted words for when checking the summary.
|
||||||
whitelist_content: Whitelisted words for when checking the content.
|
whitelist_content: Whitelisted words for when checking the content.
|
||||||
whitelist_author: Whitelisted words for when checking the author.
|
whitelist_author: Whitelisted words for when checking the author.
|
||||||
|
regex_whitelist_title: Whitelisted regex for when checking the title.
|
||||||
|
regex_whitelist_summary: Whitelisted regex for when checking the summary.
|
||||||
|
regex_whitelist_content: Whitelisted regex for when checking the content.
|
||||||
|
regex_whitelist_author: Whitelisted regex for when checking the author.
|
||||||
feed_url: The feed we should set the whitelist for.
|
feed_url: The feed we should set the whitelist for.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -269,6 +284,10 @@ async def post_set_whitelist(
|
|||||||
reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
|
reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
|
reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_whitelist_title", regex_whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_whitelist_summary", regex_whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_whitelist_content", regex_whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_whitelist_author", regex_whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
|
||||||
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
||||||
|
|
||||||
@ -287,11 +306,14 @@ async def get_whitelist(feed_url: str, request: Request):
|
|||||||
clean_feed_url: str = feed_url.strip()
|
clean_feed_url: str = feed_url.strip()
|
||||||
feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url))
|
feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url))
|
||||||
|
|
||||||
# Get previous data, this is used when creating the form.
|
|
||||||
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", ""))
|
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", ""))
|
||||||
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", ""))
|
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", ""))
|
||||||
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", ""))
|
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", ""))
|
||||||
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", ""))
|
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", ""))
|
||||||
|
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", ""))
|
||||||
|
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
|
||||||
|
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
|
||||||
|
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"request": request,
|
"request": request,
|
||||||
@ -300,6 +322,10 @@ async def get_whitelist(feed_url: str, request: Request):
|
|||||||
"whitelist_summary": whitelist_summary,
|
"whitelist_summary": whitelist_summary,
|
||||||
"whitelist_content": whitelist_content,
|
"whitelist_content": whitelist_content,
|
||||||
"whitelist_author": whitelist_author,
|
"whitelist_author": whitelist_author,
|
||||||
|
"regex_whitelist_title": regex_whitelist_title,
|
||||||
|
"regex_whitelist_summary": regex_whitelist_summary,
|
||||||
|
"regex_whitelist_content": regex_whitelist_content,
|
||||||
|
"regex_whitelist_author": regex_whitelist_author,
|
||||||
}
|
}
|
||||||
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
|
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
|
||||||
|
|
||||||
@ -310,6 +336,10 @@ async def post_set_blacklist(
|
|||||||
blacklist_summary: Annotated[str, Form()] = "",
|
blacklist_summary: Annotated[str, Form()] = "",
|
||||||
blacklist_content: Annotated[str, Form()] = "",
|
blacklist_content: Annotated[str, Form()] = "",
|
||||||
blacklist_author: Annotated[str, Form()] = "",
|
blacklist_author: Annotated[str, Form()] = "",
|
||||||
|
regex_blacklist_title: Annotated[str, Form()] = "",
|
||||||
|
regex_blacklist_summary: Annotated[str, Form()] = "",
|
||||||
|
regex_blacklist_content: Annotated[str, Form()] = "",
|
||||||
|
regex_blacklist_author: Annotated[str, Form()] = "",
|
||||||
feed_url: Annotated[str, Form()] = "",
|
feed_url: Annotated[str, Form()] = "",
|
||||||
) -> RedirectResponse:
|
) -> RedirectResponse:
|
||||||
"""Set the blacklist.
|
"""Set the blacklist.
|
||||||
@ -322,6 +352,10 @@ async def post_set_blacklist(
|
|||||||
blacklist_summary: Blacklisted words for when checking the summary.
|
blacklist_summary: Blacklisted words for when checking the summary.
|
||||||
blacklist_content: Blacklisted words for when checking the content.
|
blacklist_content: Blacklisted words for when checking the content.
|
||||||
blacklist_author: Blacklisted words for when checking the author.
|
blacklist_author: Blacklisted words for when checking the author.
|
||||||
|
regex_blacklist_title: Blacklisted regex for when checking the title.
|
||||||
|
regex_blacklist_summary: Blacklisted regex for when checking the summary.
|
||||||
|
regex_blacklist_content: Blacklisted regex for when checking the content.
|
||||||
|
regex_blacklist_author: Blacklisted regex for when checking the author.
|
||||||
feed_url: What feed we should set the blacklist for.
|
feed_url: What feed we should set the blacklist for.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -332,7 +366,10 @@ async def post_set_blacklist(
|
|||||||
reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
|
reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
|
reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_blacklist_title", regex_blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_blacklist_summary", regex_blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_blacklist_content", regex_blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
reader.set_tag(clean_feed_url, "regex_blacklist_author", regex_blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
@ -349,11 +386,14 @@ async def get_blacklist(feed_url: str, request: Request):
|
|||||||
"""
|
"""
|
||||||
feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url))
|
feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url))
|
||||||
|
|
||||||
# Get previous data, this is used when creating the form.
|
|
||||||
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", ""))
|
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", ""))
|
||||||
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", ""))
|
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", ""))
|
||||||
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", ""))
|
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", ""))
|
||||||
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", ""))
|
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", ""))
|
||||||
|
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", ""))
|
||||||
|
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
|
||||||
|
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
|
||||||
|
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"request": request,
|
"request": request,
|
||||||
@ -362,6 +402,10 @@ async def get_blacklist(feed_url: str, request: Request):
|
|||||||
"blacklist_summary": blacklist_summary,
|
"blacklist_summary": blacklist_summary,
|
||||||
"blacklist_content": blacklist_content,
|
"blacklist_content": blacklist_content,
|
||||||
"blacklist_author": blacklist_author,
|
"blacklist_author": blacklist_author,
|
||||||
|
"regex_blacklist_title": regex_blacklist_title,
|
||||||
|
"regex_blacklist_summary": regex_blacklist_summary,
|
||||||
|
"regex_blacklist_content": regex_blacklist_content,
|
||||||
|
"regex_blacklist_author": regex_blacklist_author,
|
||||||
}
|
}
|
||||||
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
|
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
|
||||||
|
|
||||||
@ -461,7 +505,7 @@ async def get_embed_page(feed_url: str, request: Request):
|
|||||||
|
|
||||||
|
|
||||||
@app.post("/embed", response_class=HTMLResponse)
|
@app.post("/embed", response_class=HTMLResponse)
|
||||||
async def post_embed( # noqa: PLR0913, PLR0917
|
async def post_embed(
|
||||||
feed_url: Annotated[str, Form()],
|
feed_url: Annotated[str, Form()],
|
||||||
title: Annotated[str, Form()] = "",
|
title: Annotated[str, Form()] = "",
|
||||||
description: Annotated[str, Form()] = "",
|
description: Annotated[str, Form()] = "",
|
||||||
|
@ -42,6 +42,49 @@
|
|||||||
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
|
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
|
||||||
<input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
|
||||||
id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
|
id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
|
||||||
|
|
||||||
|
<div class="mt-4">
|
||||||
|
<div class="form-text">
|
||||||
|
<ul class="list-inline">
|
||||||
|
<li>
|
||||||
|
Regular expression patterns for advanced filtering. Each pattern should be on a new
|
||||||
|
line.
|
||||||
|
</li>
|
||||||
|
<li>Patterns are case-insensitive.</li>
|
||||||
|
<li>
|
||||||
|
Examples:
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
^New Release:.*
|
||||||
|
\b(update|version|patch)\s+\d+\.\d+
|
||||||
|
.*\[(important|notice)\].*
|
||||||
|
</pre>
|
||||||
|
</code>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label>
|
||||||
|
<textarea name="regex_blacklist_title" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_blacklist_title"
|
||||||
|
rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
|
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">Regex Blacklist -
|
||||||
|
Summary</label>
|
||||||
|
<textarea name="regex_blacklist_summary" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_blacklist_summary"
|
||||||
|
rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
|
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">Regex Blacklist -
|
||||||
|
Content</label>
|
||||||
|
<textarea name="regex_blacklist_content" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_blacklist_content"
|
||||||
|
rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
|
<label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label>
|
||||||
|
<textarea name="regex_blacklist_author" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_blacklist_author"
|
||||||
|
rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Add a hidden feed_url field to the form -->
|
<!-- Add a hidden feed_url field to the form -->
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
{% block title %}
|
{% block title %}
|
||||||
| Blacklist
|
| Whitelist
|
||||||
{% endblock title %}
|
{% endblock title %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="p-2 border border-dark">
|
<div class="p-2 border border-dark">
|
||||||
@ -42,6 +42,49 @@
|
|||||||
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
|
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
|
||||||
<input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
|
||||||
id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
|
id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
|
||||||
|
|
||||||
|
<div class="mt-4">
|
||||||
|
<div class="form-text">
|
||||||
|
<ul class="list-inline">
|
||||||
|
<li>
|
||||||
|
Regular expression patterns for advanced filtering. Each pattern should be on a new
|
||||||
|
line.
|
||||||
|
</li>
|
||||||
|
<li>Patterns are case-insensitive.</li>
|
||||||
|
<li>
|
||||||
|
Examples:
|
||||||
|
<code>
|
||||||
|
<pre>
|
||||||
|
^New Release:.*
|
||||||
|
\b(update|version|patch)\s+\d+\.\d+
|
||||||
|
.*\[(important|notice)\].*
|
||||||
|
</pre>
|
||||||
|
</code>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label>
|
||||||
|
<textarea name="regex_whitelist_title" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_whitelist_title"
|
||||||
|
rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
|
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">Regex Whitelist -
|
||||||
|
Summary</label>
|
||||||
|
<textarea name="regex_whitelist_summary" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_whitelist_summary"
|
||||||
|
rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
|
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">Regex Whitelist -
|
||||||
|
Content</label>
|
||||||
|
<textarea name="regex_whitelist_content" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_whitelist_content"
|
||||||
|
rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
|
<label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label>
|
||||||
|
<textarea name="regex_whitelist_author" class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="regex_whitelist_author"
|
||||||
|
rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Add a hidden feed_url field to the form -->
|
<!-- Add a hidden feed_url field to the form -->
|
||||||
|
@ -42,7 +42,7 @@ platformdirs = "*"
|
|||||||
python-dotenv = "*"
|
python-dotenv = "*"
|
||||||
python-multipart = "*"
|
python-multipart = "*"
|
||||||
reader = "*"
|
reader = "*"
|
||||||
sentry-sdk = {version = "*", extras = ["fastapi"]}
|
sentry-sdk = { version = "*", extras = ["fastapi"] }
|
||||||
uvicorn = "*"
|
uvicorn = "*"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
@ -86,6 +86,8 @@ lint.ignore = [
|
|||||||
"PLR6301", # Checks for the presence of unused self parameter in methods definitions.
|
"PLR6301", # Checks for the presence of unused self parameter in methods definitions.
|
||||||
"RUF029", # Checks for functions declared async that do not await or otherwise use features requiring the function to be declared async.
|
"RUF029", # Checks for functions declared async that do not await or otherwise use features requiring the function to be declared async.
|
||||||
"TD003", # Checks that a TODO comment is associated with a link to a relevant issue or ticket.
|
"TD003", # Checks that a TODO comment is associated with a link to a relevant issue or ticket.
|
||||||
|
"PLR0913", # Checks for function definitions that include too many arguments.
|
||||||
|
"PLR0917", # Checks for function definitions that include too many positional arguments.
|
||||||
|
|
||||||
# Conflicting lint rules when using Ruff's formatter
|
# Conflicting lint rules when using Ruff's formatter
|
||||||
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
|
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
|
||||||
|
@ -39,6 +39,13 @@ def test_has_black_tags() -> None:
|
|||||||
check_if_has_tag(reader, feed, "blacklist_title")
|
check_if_has_tag(reader, feed, "blacklist_title")
|
||||||
check_if_has_tag(reader, feed, "blacklist_summary")
|
check_if_has_tag(reader, feed, "blacklist_summary")
|
||||||
check_if_has_tag(reader, feed, "blacklist_content")
|
check_if_has_tag(reader, feed, "blacklist_content")
|
||||||
|
check_if_has_tag(reader, feed, "blacklist_author")
|
||||||
|
|
||||||
|
# Test regex blacklist tags
|
||||||
|
check_if_has_tag(reader, feed, "regex_blacklist_title")
|
||||||
|
check_if_has_tag(reader, feed, "regex_blacklist_summary")
|
||||||
|
check_if_has_tag(reader, feed, "regex_blacklist_content")
|
||||||
|
check_if_has_tag(reader, feed, "regex_blacklist_author")
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
reader.delete_feed(feed_url)
|
reader.delete_feed(feed_url)
|
||||||
@ -74,6 +81,7 @@ def test_should_be_skipped() -> None:
|
|||||||
# Test entry without any blacklists
|
# Test entry without any blacklists
|
||||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test standard blacklist functionality
|
||||||
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
|
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
|
||||||
assert entry_should_be_skipped(reader, first_entry[0]) is True, f"Entry should be skipped: {first_entry[0]}"
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, f"Entry should be skipped: {first_entry[0]}"
|
||||||
reader.delete_tag(feed, "blacklist_title")
|
reader.delete_tag(feed, "blacklist_title")
|
||||||
@ -113,3 +121,81 @@ def test_should_be_skipped() -> None:
|
|||||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
reader.delete_tag(feed, "blacklist_author")
|
reader.delete_tag(feed, "blacklist_author")
|
||||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_should_be_skipped() -> None:
|
||||||
|
"""Test the regex filtering functionality for blacklist."""
|
||||||
|
reader: Reader = get_reader()
|
||||||
|
|
||||||
|
# Add feed and update entries
|
||||||
|
reader.add_feed(feed_url)
|
||||||
|
feed: Feed = reader.get_feed(feed_url)
|
||||||
|
reader.update_feeds()
|
||||||
|
|
||||||
|
# Get first entry
|
||||||
|
first_entry: list[Entry] = []
|
||||||
|
entries: Iterable[Entry] = reader.get_entries(feed=feed)
|
||||||
|
assert entries is not None, f"Entries should not be None: {entries}"
|
||||||
|
for entry in entries:
|
||||||
|
first_entry.append(entry)
|
||||||
|
break
|
||||||
|
assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
|
||||||
|
|
||||||
|
# Test entry without any regex blacklists
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test regex blacklist for title
|
||||||
|
reader.set_tag(feed, "regex_blacklist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||||
|
f"Entry should be skipped with regex title match: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_title")
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test regex blacklist for summary
|
||||||
|
reader.set_tag(feed, "regex_blacklist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||||
|
f"Entry should be skipped with regex summary match: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_summary")
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test regex blacklist for content
|
||||||
|
reader.set_tag(feed, "regex_blacklist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||||
|
f"Entry should be skipped with regex content match: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_content")
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test regex blacklist for author
|
||||||
|
reader.set_tag(feed, "regex_blacklist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||||
|
f"Entry should be skipped with regex author match: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_author")
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test invalid regex pattern (should not raise an exception)
|
||||||
|
reader.set_tag(feed, "regex_blacklist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, (
|
||||||
|
f"Entry should not be skipped with invalid regex: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_title")
|
||||||
|
|
||||||
|
# Test multiple regex patterns separated by commas
|
||||||
|
reader.set_tag(feed, "regex_blacklist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||||
|
f"Entry should be skipped with one matching pattern in list: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_author")
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
# Test newline-separated regex patterns
|
||||||
|
newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
|
||||||
|
reader.set_tag(feed, "regex_blacklist_author", newline_patterns) # pyright: ignore[reportArgumentType]
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is True, (
|
||||||
|
f"Entry should be skipped with newline-separated patterns: {first_entry[0]}"
|
||||||
|
)
|
||||||
|
reader.delete_tag(feed, "regex_blacklist_author")
|
||||||
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from discord_rss_bot.filter.utils import is_word_in_text
|
from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
|
||||||
|
|
||||||
|
|
||||||
def test_is_word_in_text() -> None:
|
def test_is_word_in_text() -> None:
|
||||||
@ -14,3 +14,51 @@ def test_is_word_in_text() -> None:
|
|||||||
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
|
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
|
||||||
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
|
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
|
||||||
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false
|
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_regex_match() -> None:
|
||||||
|
msg_true = "Should return True"
|
||||||
|
msg_false = "Should return False"
|
||||||
|
|
||||||
|
# Test basic regex patterns
|
||||||
|
assert is_regex_match(r"word\d+", "This text contains word123") is True, msg_true
|
||||||
|
assert is_regex_match(r"^Hello", "Hello world") is True, msg_true
|
||||||
|
assert is_regex_match(r"world$", "Hello world") is True, msg_true
|
||||||
|
|
||||||
|
# Test case insensitivity
|
||||||
|
assert is_regex_match(r"hello", "This text contains HELLO") is True, msg_true
|
||||||
|
|
||||||
|
# Test comma-separated patterns
|
||||||
|
assert is_regex_match(r"pattern1,pattern2", "This contains pattern2") is True, msg_true
|
||||||
|
assert is_regex_match(r"pattern1, pattern2", "This contains pattern1") is True, msg_true
|
||||||
|
|
||||||
|
# Test regex that shouldn't match
|
||||||
|
assert is_regex_match(r"^start", "This doesn't start with the pattern") is False, msg_false
|
||||||
|
assert is_regex_match(r"end$", "This doesn't end with the pattern") is False, msg_false
|
||||||
|
|
||||||
|
# Test with empty input
|
||||||
|
assert is_regex_match("", "Some text") is False, msg_false
|
||||||
|
assert is_regex_match("pattern", "") is False, msg_false
|
||||||
|
|
||||||
|
# Test with invalid regex (should not raise an exception and return False)
|
||||||
|
assert is_regex_match(r"[incomplete", "Some text") is False, msg_false
|
||||||
|
|
||||||
|
# Test with multiple patterns where one is invalid
|
||||||
|
assert is_regex_match(r"valid, [invalid, \w+", "Contains word") is True, msg_true
|
||||||
|
|
||||||
|
# Test newline-separated patterns
|
||||||
|
newline_patterns = "pattern1\n^start\ncontains\\d+"
|
||||||
|
assert is_regex_match(newline_patterns, "This contains123 text") is True, msg_true
|
||||||
|
assert is_regex_match(newline_patterns, "start of line") is True, msg_true
|
||||||
|
assert is_regex_match(newline_patterns, "pattern1 is here") is True, msg_true
|
||||||
|
assert is_regex_match(newline_patterns, "None of these match") is False, msg_false
|
||||||
|
|
||||||
|
# Test mixed newline and comma patterns (for backward compatibility)
|
||||||
|
mixed_patterns = "pattern1\npattern2,pattern3\npattern4"
|
||||||
|
assert is_regex_match(mixed_patterns, "Contains pattern3") is True, msg_true
|
||||||
|
assert is_regex_match(mixed_patterns, "Contains pattern4") is True, msg_true
|
||||||
|
|
||||||
|
# Test with empty lines and spaces
|
||||||
|
whitespace_patterns = "\\s+\n \n\npattern\n\n"
|
||||||
|
assert is_regex_match(whitespace_patterns, "text with spaces") is True, msg_true
|
||||||
|
assert is_regex_match(whitespace_patterns, "text with pattern") is True, msg_true
|
||||||
|
@ -38,6 +38,13 @@ def test_has_white_tags() -> None:
|
|||||||
check_if_has_tag(reader, feed, "whitelist_title")
|
check_if_has_tag(reader, feed, "whitelist_title")
|
||||||
check_if_has_tag(reader, feed, "whitelist_summary")
|
check_if_has_tag(reader, feed, "whitelist_summary")
|
||||||
check_if_has_tag(reader, feed, "whitelist_content")
|
check_if_has_tag(reader, feed, "whitelist_content")
|
||||||
|
check_if_has_tag(reader, feed, "whitelist_author")
|
||||||
|
|
||||||
|
# Test regex whitelist tags
|
||||||
|
check_if_has_tag(reader, feed, "regex_whitelist_title")
|
||||||
|
check_if_has_tag(reader, feed, "regex_whitelist_summary")
|
||||||
|
check_if_has_tag(reader, feed, "regex_whitelist_content")
|
||||||
|
check_if_has_tag(reader, feed, "regex_whitelist_author")
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
reader.delete_feed(feed_url)
|
reader.delete_feed(feed_url)
|
||||||
@ -109,3 +116,67 @@ def test_should_be_sent() -> None:
|
|||||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
reader.delete_tag(feed, "whitelist_author")
|
reader.delete_tag(feed, "whitelist_author")
|
||||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_should_be_sent() -> None:
|
||||||
|
"""Test the regex filtering functionality for whitelist."""
|
||||||
|
reader: Reader = get_reader()
|
||||||
|
|
||||||
|
# Add feed and update entries
|
||||||
|
reader.add_feed(feed_url)
|
||||||
|
feed: Feed = reader.get_feed(feed_url)
|
||||||
|
reader.update_feeds()
|
||||||
|
|
||||||
|
# Get first entry
|
||||||
|
first_entry: list[Entry] = []
|
||||||
|
entries: Iterable[Entry] = reader.get_entries(feed=feed)
|
||||||
|
assert entries is not None, "Entries should not be None"
|
||||||
|
for entry in entries:
|
||||||
|
first_entry.append(entry)
|
||||||
|
break
|
||||||
|
assert len(first_entry) == 1, "First entry should be added"
|
||||||
|
|
||||||
|
# Test entry without any regex whitelists
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
# Test regex whitelist for title
|
||||||
|
reader.set_tag(feed, "regex_whitelist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex title match"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_title")
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
# Test regex whitelist for summary
|
||||||
|
reader.set_tag(feed, "regex_whitelist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex summary match"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_summary")
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
# Test regex whitelist for content
|
||||||
|
reader.set_tag(feed, "regex_whitelist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex content match"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_content")
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
# Test regex whitelist for author
|
||||||
|
reader.set_tag(feed, "regex_whitelist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex author match"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_author")
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
# Test invalid regex pattern (should not raise an exception)
|
||||||
|
reader.set_tag(feed, "regex_whitelist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent with invalid regex"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_title")
|
||||||
|
|
||||||
|
# Test multiple regex patterns separated by commas
|
||||||
|
reader.set_tag(feed, "regex_whitelist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with one matching pattern in list"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_author")
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
# Test newline-separated regex patterns
|
||||||
|
newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
|
||||||
|
reader.set_tag(feed, "regex_whitelist_author", newline_patterns) # pyright: ignore[reportArgumentType]
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
|
||||||
|
reader.delete_tag(feed, "regex_whitelist_author")
|
||||||
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
Reference in New Issue
Block a user