From ac63041b28d1ce87685523a7957c3b3360c8229c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= <tlovinator@gmail.com>
Date: Thu, 3 Apr 2025 05:44:50 +0200
Subject: [PATCH] =?UTF-8?q?Add=20regex=20support=20to=20blacklist=20and=20?=
 =?UTF-8?q?whitelist=20filters.=20Strong=20code,=20many=20bananas!=20?=
 =?UTF-8?q?=F0=9F=A6=8D=F0=9F=A6=8D=F0=9F=A6=8D=F0=9F=A6=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .pre-commit-config.yaml                  |  2 +-
 discord_rss_bot/filter/blacklist.py      | 85 +++++++++++++++++++----
 discord_rss_bot/filter/utils.py          | 50 ++++++++++++++
 discord_rss_bot/filter/whitelist.py      | 71 +++++++++++++++----
 discord_rss_bot/main.py                  | 58 ++++++++++++++--
 discord_rss_bot/templates/blacklist.html | 43 ++++++++++++
 discord_rss_bot/templates/whitelist.html | 45 ++++++++++++-
 pyproject.toml                           |  4 +-
 tests/test_blacklist.py                  | 86 ++++++++++++++++++++++++
 tests/test_utils.py                      | 50 +++++++++++++-
 tests/test_whitelist.py                  | 71 +++++++++++++++++++
 11 files changed, 526 insertions(+), 39 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a3c42c0..908367d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
 
   # An extremely fast Python linter and formatter.
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.5
+    rev: v0.11.2
     hooks:
       - id: ruff-format
       - id: ruff
diff --git a/discord_rss_bot/filter/blacklist.py b/discord_rss_bot/filter/blacklist.py
index 808d7c9..87b4913 100644
--- a/discord_rss_bot/filter/blacklist.py
+++ b/discord_rss_bot/filter/blacklist.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
-from discord_rss_bot.filter.utils import is_word_in_text
+from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
 
 if TYPE_CHECKING:
     from reader import Entry, Feed, Reader
@@ -12,9 +12,14 @@ def feed_has_blacklist_tags(custom_reader: Reader, feed: Feed) -> bool:
     """Return True if the feed has blacklist tags.
 
     The following tags are checked:
-    - blacklist_title
+    - blacklist_author
+    - blacklist_content
     - blacklist_summary
-    - blacklist_content.
+    - blacklist_title
+    - regex_blacklist_author
+    - regex_blacklist_content
+    - regex_blacklist_summary
+    - regex_blacklist_title
 
     Args:
         custom_reader: The reader.
@@ -23,14 +28,29 @@ def feed_has_blacklist_tags(custom_reader: Reader, feed: Feed) -> bool:
     Returns:
         bool: If the feed has any of the tags.
     """
-    blacklist_title: str = str(custom_reader.get_tag(feed, "blacklist_title", ""))
-    blacklist_summary: str = str(custom_reader.get_tag(feed, "blacklist_summary", ""))
-    blacklist_content: str = str(custom_reader.get_tag(feed, "blacklist_content", ""))
+    blacklist_author: str = str(custom_reader.get_tag(feed, "blacklist_author", "")).strip()
+    blacklist_content: str = str(custom_reader.get_tag(feed, "blacklist_content", "")).strip()
+    blacklist_summary: str = str(custom_reader.get_tag(feed, "blacklist_summary", "")).strip()
+    blacklist_title: str = str(custom_reader.get_tag(feed, "blacklist_title", "")).strip()
 
-    return bool(blacklist_title or blacklist_summary or blacklist_content)
+    regex_blacklist_author: str = str(custom_reader.get_tag(feed, "regex_blacklist_author", "")).strip()
+    regex_blacklist_content: str = str(custom_reader.get_tag(feed, "regex_blacklist_content", "")).strip()
+    regex_blacklist_summary: str = str(custom_reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
+    regex_blacklist_title: str = str(custom_reader.get_tag(feed, "regex_blacklist_title", "")).strip()
+
+    return bool(
+        blacklist_title
+        or blacklist_author
+        or blacklist_content
+        or blacklist_summary
+        or regex_blacklist_author
+        or regex_blacklist_content
+        or regex_blacklist_summary
+        or regex_blacklist_title,
+    )
 
 
-def entry_should_be_skipped(custom_reader: Reader, entry: Entry) -> bool:
+def entry_should_be_skipped(custom_reader: Reader, entry: Entry) -> bool:  # noqa: PLR0911
     """Return True if the entry is in the blacklist.
 
     Args:
@@ -40,21 +60,58 @@ def entry_should_be_skipped(custom_reader: Reader, entry: Entry) -> bool:
     Returns:
         bool: If the entry is in the blacklist.
     """
-    blacklist_title: str = str(custom_reader.get_tag(entry.feed, "blacklist_title", ""))
-    blacklist_summary: str = str(custom_reader.get_tag(entry.feed, "blacklist_summary", ""))
-    blacklist_content: str = str(custom_reader.get_tag(entry.feed, "blacklist_content", ""))
-    blacklist_author: str = str(custom_reader.get_tag(entry.feed, "blacklist_author", ""))
+    feed = entry.feed
+
+    blacklist_title: str = str(custom_reader.get_tag(feed, "blacklist_title", "")).strip()
+    blacklist_summary: str = str(custom_reader.get_tag(feed, "blacklist_summary", "")).strip()
+    blacklist_content: str = str(custom_reader.get_tag(feed, "blacklist_content", "")).strip()
+    blacklist_author: str = str(custom_reader.get_tag(feed, "blacklist_author", "")).strip()
+
+    regex_blacklist_title: str = str(custom_reader.get_tag(feed, "regex_blacklist_title", "")).strip()
+    regex_blacklist_summary: str = str(custom_reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
+    regex_blacklist_content: str = str(custom_reader.get_tag(feed, "regex_blacklist_content", "")).strip()
+    regex_blacklist_author: str = str(custom_reader.get_tag(feed, "regex_blacklist_author", "")).strip()
     # TODO(TheLovinator): Also add support for entry_text and more.
 
+    # Check regular blacklist
     if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
         return True
     if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
         return True
+    if (
+        entry.content
+        and entry.content[0].value
+        and blacklist_content
+        and is_word_in_text(blacklist_content, entry.content[0].value)
+    ):
+        return True
     if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
         return True
+    if (
+        entry.content
+        and entry.content[0].value
+        and blacklist_content
+        and is_word_in_text(blacklist_content, entry.content[0].value)
+    ):
+        return True
+
+    # Check regex blacklist
+    if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title):
+        return True
+    if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary):
+        return True
+    if (
+        entry.content
+        and entry.content[0].value
+        and regex_blacklist_content
+        and is_regex_match(regex_blacklist_content, entry.content[0].value)
+    ):
+        return True
+    if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author):
+        return True
     return bool(
         entry.content
         and entry.content[0].value
-        and blacklist_content
-        and is_word_in_text(blacklist_content, entry.content[0].value),
+        and regex_blacklist_content
+        and is_regex_match(regex_blacklist_content, entry.content[0].value),
     )
diff --git a/discord_rss_bot/filter/utils.py b/discord_rss_bot/filter/utils.py
index 090518d..ff93e59 100644
--- a/discord_rss_bot/filter/utils.py
+++ b/discord_rss_bot/filter/utils.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
+import logging
 import re
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 
 def is_word_in_text(word_string: str, text: str) -> bool:
     """Check if any of the words are in the text.
@@ -20,3 +23,50 @@ def is_word_in_text(word_string: str, text: str) -> bool:
 
     # Check if any pattern matches the text.
     return any(pattern.search(text) for pattern in patterns)
+
+
+def is_regex_match(regex_string: str, text: str) -> bool:
+    """Check if any of the regex patterns match the text.
+
+    Args:
+        regex_string: A string containing regex patterns, separated by newlines or commas.
+        text: The text to search in.
+
+    Returns:
+        bool: True if any regex pattern matches the text, otherwise False.
+    """
+    if not regex_string or not text:
+        return False
+
+    # Split by newlines first, then by commas (for backward compatibility)
+    regex_list: list[str] = []
+
+    # First split by newlines
+    lines: list[str] = regex_string.split("\n")
+    for line in lines:
+        stripped_line: str = line.strip()
+        if stripped_line:
+            # For backward compatibility, also split by commas if there are any
+            if "," in stripped_line:
+                regex_list.extend([part.strip() for part in stripped_line.split(",") if part.strip()])
+            else:
+                regex_list.append(stripped_line)
+
+    # Attempt to compile and apply each regex pattern
+    for pattern_str in regex_list:
+        if not pattern_str:
+            logger.warning("Empty regex pattern found in the list.")
+            continue
+
+        try:
+            pattern: re.Pattern[str] = re.compile(pattern_str, re.IGNORECASE)
+            if pattern.search(text):
+                logger.info("Regex pattern matched: %s", pattern_str)
+                return True
+        except re.error:
+            logger.warning("Invalid regex pattern: %s", pattern_str)
+            continue
+
+    logger.info("No regex patterns matched.")
+
+    return False
diff --git a/discord_rss_bot/filter/whitelist.py b/discord_rss_bot/filter/whitelist.py
index a55a514..b4b5c23 100644
--- a/discord_rss_bot/filter/whitelist.py
+++ b/discord_rss_bot/filter/whitelist.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 from typing import TYPE_CHECKING
 
-from discord_rss_bot.filter.utils import is_word_in_text
+from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
 
 if TYPE_CHECKING:
     from reader import Entry, Feed, Reader
@@ -12,9 +12,14 @@ def has_white_tags(custom_reader: Reader, feed: Feed) -> bool:
     """Return True if the feed has whitelist tags.
 
     The following tags are checked:
-    - whitelist_title
+    - regex_whitelist_author
+    - regex_whitelist_content
+    - regex_whitelist_summary
+    - regex_whitelist_title
+    - whitelist_author
+    - whitelist_content
     - whitelist_summary
-    - whitelist_content.
+    - whitelist_title
 
     Args:
         custom_reader: The reader.
@@ -23,14 +28,29 @@ def has_white_tags(custom_reader: Reader, feed: Feed) -> bool:
     Returns:
         bool: If the feed has any of the tags.
     """
-    whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", ""))
-    whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", ""))
-    whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", ""))
+    whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", "")).strip()
+    whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", "")).strip()
+    whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", "")).strip()
+    whitelist_author: str = str(custom_reader.get_tag(feed, "whitelist_author", "")).strip()
 
-    return bool(whitelist_title or whitelist_summary or whitelist_content)
+    regex_whitelist_title: str = str(custom_reader.get_tag(feed, "regex_whitelist_title", "")).strip()
+    regex_whitelist_summary: str = str(custom_reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
+    regex_whitelist_content: str = str(custom_reader.get_tag(feed, "regex_whitelist_content", "")).strip()
+    regex_whitelist_author: str = str(custom_reader.get_tag(feed, "regex_whitelist_author", "")).strip()
+
+    return bool(
+        whitelist_title
+        or whitelist_author
+        or whitelist_content
+        or whitelist_summary
+        or regex_whitelist_author
+        or regex_whitelist_content
+        or regex_whitelist_summary
+        or regex_whitelist_title,
+    )
 
 
-def should_be_sent(custom_reader: Reader, entry: Entry) -> bool:
+def should_be_sent(custom_reader: Reader, entry: Entry) -> bool:  # noqa: PLR0911
     """Return True if the entry is in the whitelist.
 
     Args:
@@ -41,20 +61,43 @@ def should_be_sent(custom_reader: Reader, entry: Entry) -> bool:
         bool: If the entry is in the whitelist.
     """
     feed: Feed = entry.feed
-    whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", ""))
-    whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", ""))
-    whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", ""))
-    whitelist_author: str = str(custom_reader.get_tag(feed, "whitelist_author", ""))
+    # Regular whitelist tags
+    whitelist_title: str = str(custom_reader.get_tag(feed, "whitelist_title", "")).strip()
+    whitelist_summary: str = str(custom_reader.get_tag(feed, "whitelist_summary", "")).strip()
+    whitelist_content: str = str(custom_reader.get_tag(feed, "whitelist_content", "")).strip()
+    whitelist_author: str = str(custom_reader.get_tag(feed, "whitelist_author", "")).strip()
 
+    # Regex whitelist tags
+    regex_whitelist_title: str = str(custom_reader.get_tag(feed, "regex_whitelist_title", "")).strip()
+    regex_whitelist_summary: str = str(custom_reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
+    regex_whitelist_content: str = str(custom_reader.get_tag(feed, "regex_whitelist_content", "")).strip()
+    regex_whitelist_author: str = str(custom_reader.get_tag(feed, "regex_whitelist_author", "")).strip()
+
+    # Check regular whitelist
     if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
         return True
     if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
         return True
     if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
         return True
-    return bool(
+    if (
         entry.content
         and entry.content[0].value
         and whitelist_content
-        and is_word_in_text(whitelist_content, entry.content[0].value),
+        and is_word_in_text(whitelist_content, entry.content[0].value)
+    ):
+        return True
+
+    # Check regex whitelist
+    if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title):
+        return True
+    if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary):
+        return True
+    if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author):
+        return True
+    return bool(
+        entry.content
+        and entry.content[0].value
+        and regex_whitelist_content
+        and is_regex_match(regex_whitelist_content, entry.content[0].value),
     )
diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py
index 3a1f0ca..a7c6510 100644
--- a/discord_rss_bot/main.py
+++ b/discord_rss_bot/main.py
@@ -43,7 +43,7 @@ from discord_rss_bot.search import create_html_for_search_results
 from discord_rss_bot.settings import get_reader
 
 if TYPE_CHECKING:
-    from collections.abc import Iterable
+    from collections.abc import AsyncGenerator, Iterable
 
     from reader.types import JSONType
 
@@ -88,8 +88,15 @@ reader: Reader = get_reader()
 
 
 @asynccontextmanager
-async def lifespan(app: FastAPI) -> typing.AsyncGenerator[None]:
-    """This is needed for the ASGI server to run."""
+async def lifespan(app: FastAPI) -> AsyncGenerator[None]:
+    """Lifespan for the FastAPI app.
+
+    Args:
+        app: The FastAPI app.
+
+    Yields:
+        None: Nothing.
+    """
     add_missing_tags(reader)
     scheduler: AsyncIOScheduler = AsyncIOScheduler()
 
@@ -250,6 +257,10 @@ async def post_set_whitelist(
     whitelist_summary: Annotated[str, Form()] = "",
     whitelist_content: Annotated[str, Form()] = "",
     whitelist_author: Annotated[str, Form()] = "",
+    regex_whitelist_title: Annotated[str, Form()] = "",
+    regex_whitelist_summary: Annotated[str, Form()] = "",
+    regex_whitelist_content: Annotated[str, Form()] = "",
+    regex_whitelist_author: Annotated[str, Form()] = "",
     feed_url: Annotated[str, Form()] = "",
 ) -> RedirectResponse:
     """Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
@@ -259,6 +270,10 @@ async def post_set_whitelist(
         whitelist_summary: Whitelisted words for when checking the summary.
         whitelist_content: Whitelisted words for when checking the content.
         whitelist_author: Whitelisted words for when checking the author.
+        regex_whitelist_title: Whitelisted regex for when checking the title.
+        regex_whitelist_summary: Whitelisted regex for when checking the summary.
+        regex_whitelist_content: Whitelisted regex for when checking the content.
+        regex_whitelist_author: Whitelisted regex for when checking the author.
         feed_url: The feed we should set the whitelist for.
 
     Returns:
@@ -269,6 +284,10 @@ async def post_set_whitelist(
     reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary)  # pyright: ignore[reportArgumentType][call-overload]
     reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content)  # pyright: ignore[reportArgumentType][call-overload]
     reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_whitelist_title", regex_whitelist_title)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_whitelist_summary", regex_whitelist_summary)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_whitelist_content", regex_whitelist_content)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_whitelist_author", regex_whitelist_author)  # pyright: ignore[reportArgumentType][call-overload]
 
     return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
 
@@ -287,11 +306,14 @@ async def get_whitelist(feed_url: str, request: Request):
     clean_feed_url: str = feed_url.strip()
     feed: Feed = reader.get_feed(urllib.parse.unquote(clean_feed_url))
 
-    # Get previous data, this is used when creating the form.
     whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", ""))
     whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", ""))
     whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", ""))
     whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", ""))
+    regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", ""))
+    regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
+    regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
+    regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
 
     context = {
         "request": request,
@@ -300,6 +322,10 @@ async def get_whitelist(feed_url: str, request: Request):
         "whitelist_summary": whitelist_summary,
         "whitelist_content": whitelist_content,
         "whitelist_author": whitelist_author,
+        "regex_whitelist_title": regex_whitelist_title,
+        "regex_whitelist_summary": regex_whitelist_summary,
+        "regex_whitelist_content": regex_whitelist_content,
+        "regex_whitelist_author": regex_whitelist_author,
     }
     return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
 
@@ -310,6 +336,10 @@ async def post_set_blacklist(
     blacklist_summary: Annotated[str, Form()] = "",
     blacklist_content: Annotated[str, Form()] = "",
     blacklist_author: Annotated[str, Form()] = "",
+    regex_blacklist_title: Annotated[str, Form()] = "",
+    regex_blacklist_summary: Annotated[str, Form()] = "",
+    regex_blacklist_content: Annotated[str, Form()] = "",
+    regex_blacklist_author: Annotated[str, Form()] = "",
     feed_url: Annotated[str, Form()] = "",
 ) -> RedirectResponse:
     """Set the blacklist.
@@ -322,6 +352,10 @@ async def post_set_blacklist(
         blacklist_summary: Blacklisted words for when checking the summary.
         blacklist_content: Blacklisted words for when checking the content.
         blacklist_author: Blacklisted words for when checking the author.
+        regex_blacklist_title: Blacklisted regex for when checking the title.
+        regex_blacklist_summary: Blacklisted regex for when checking the summary.
+        regex_blacklist_content: Blacklisted regex for when checking the content.
+        regex_blacklist_author: Blacklisted regex for when checking the author.
         feed_url: What feed we should set the blacklist for.
 
     Returns:
@@ -332,7 +366,10 @@ async def post_set_blacklist(
     reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary)  # pyright: ignore[reportArgumentType][call-overload]
     reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content)  # pyright: ignore[reportArgumentType][call-overload]
     reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author)  # pyright: ignore[reportArgumentType][call-overload]
-
+    reader.set_tag(clean_feed_url, "regex_blacklist_title", regex_blacklist_title)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_blacklist_summary", regex_blacklist_summary)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_blacklist_content", regex_blacklist_content)  # pyright: ignore[reportArgumentType][call-overload]
+    reader.set_tag(clean_feed_url, "regex_blacklist_author", regex_blacklist_author)  # pyright: ignore[reportArgumentType][call-overload]
     return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
 
 
@@ -349,11 +386,14 @@ async def get_blacklist(feed_url: str, request: Request):
     """
     feed: Feed = reader.get_feed(urllib.parse.unquote(feed_url))
 
-    # Get previous data, this is used when creating the form.
     blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", ""))
     blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", ""))
     blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", ""))
     blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", ""))
+    regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", ""))
+    regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
+    regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
+    regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
 
     context = {
         "request": request,
@@ -362,6 +402,10 @@ async def get_blacklist(feed_url: str, request: Request):
         "blacklist_summary": blacklist_summary,
         "blacklist_content": blacklist_content,
         "blacklist_author": blacklist_author,
+        "regex_blacklist_title": regex_blacklist_title,
+        "regex_blacklist_summary": regex_blacklist_summary,
+        "regex_blacklist_content": regex_blacklist_content,
+        "regex_blacklist_author": regex_blacklist_author,
     }
     return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
 
@@ -461,7 +505,7 @@ async def get_embed_page(feed_url: str, request: Request):
 
 
 @app.post("/embed", response_class=HTMLResponse)
-async def post_embed(  # noqa: PLR0913, PLR0917
+async def post_embed(
     feed_url: Annotated[str, Form()],
     title: Annotated[str, Form()] = "",
     description: Annotated[str, Form()] = "",
diff --git a/discord_rss_bot/templates/blacklist.html b/discord_rss_bot/templates/blacklist.html
index 3632277..ec16bce 100644
--- a/discord_rss_bot/templates/blacklist.html
+++ b/discord_rss_bot/templates/blacklist.html
@@ -42,6 +42,49 @@
                 <label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
                 <input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
                     id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
+
+                <div class="mt-4">
+                    <div class="form-text">
+                        <ul class="list-inline">
+                            <li>
+                                Regular expression patterns for advanced filtering. Each pattern should be on a new
+                                line.
+                            </li>
+                            <li>Patterns are case-insensitive.</li>
+                            <li>
+                                Examples:
+                                <code>
+<pre>
+^New Release:.*
+\b(update|version|patch)\s+\d+\.\d+
+.*\[(important|notice)\].*
+</pre>
+                                </code>
+                            </li>
+                        </ul>
+                    </div>
+                    <label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label>
+                    <textarea name="regex_blacklist_title" class="form-control bg-dark border-dark text-muted"
+                        id="regex_blacklist_title"
+                        rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea>
+
+                    <label for="regex_blacklist_summary" class="col-sm-6 col-form-label">Regex Blacklist -
+                        Summary</label>
+                    <textarea name="regex_blacklist_summary" class="form-control bg-dark border-dark text-muted"
+                        id="regex_blacklist_summary"
+                        rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea>
+
+                    <label for="regex_blacklist_content" class="col-sm-6 col-form-label">Regex Blacklist -
+                        Content</label>
+                    <textarea name="regex_blacklist_content" class="form-control bg-dark border-dark text-muted"
+                        id="regex_blacklist_content"
+                        rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea>
+
+                    <label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label>
+                    <textarea name="regex_blacklist_author" class="form-control bg-dark border-dark text-muted"
+                        id="regex_blacklist_author"
+                        rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea>
+                </div>
             </div>
         </div>
         <!-- Add a hidden feed_url field to the form -->
diff --git a/discord_rss_bot/templates/whitelist.html b/discord_rss_bot/templates/whitelist.html
index 5a958f6..61755e2 100644
--- a/discord_rss_bot/templates/whitelist.html
+++ b/discord_rss_bot/templates/whitelist.html
@@ -1,6 +1,6 @@
 {% extends "base.html" %}
 {% block title %}
-| Blacklist
+| Whitelist
 {% endblock title %}
 {% block content %}
 <div class="p-2 border border-dark">
@@ -42,6 +42,49 @@
                 <label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
                 <input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
                     id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
+
+                <div class="mt-4">
+                    <div class="form-text">
+                        <ul class="list-inline">
+                            <li>
+                                Regular expression patterns for advanced filtering. Each pattern should be on a new
+                                line.
+                            </li>
+                            <li>Patterns are case-insensitive.</li>
+                            <li>
+                                Examples:
+                                <code>
+<pre>
+^New Release:.*
+\b(update|version|patch)\s+\d+\.\d+
+.*\[(important|notice)\].*
+</pre>
+                                </code>
+                            </li>
+                        </ul>
+                    </div>
+                    <label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label>
+                    <textarea name="regex_whitelist_title" class="form-control bg-dark border-dark text-muted"
+                        id="regex_whitelist_title"
+                        rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea>
+
+                    <label for="regex_whitelist_summary" class="col-sm-6 col-form-label">Regex Whitelist -
+                        Summary</label>
+                    <textarea name="regex_whitelist_summary" class="form-control bg-dark border-dark text-muted"
+                        id="regex_whitelist_summary"
+                        rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea>
+
+                    <label for="regex_whitelist_content" class="col-sm-6 col-form-label">Regex Whitelist -
+                        Content</label>
+                    <textarea name="regex_whitelist_content" class="form-control bg-dark border-dark text-muted"
+                        id="regex_whitelist_content"
+                        rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea>
+
+                    <label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label>
+                    <textarea name="regex_whitelist_author" class="form-control bg-dark border-dark text-muted"
+                        id="regex_whitelist_author"
+                        rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea>
+                </div>
             </div>
         </div>
         <!-- Add a hidden feed_url field to the form -->
diff --git a/pyproject.toml b/pyproject.toml
index 4cda1f6..21ab35a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ platformdirs = "*"
 python-dotenv = "*"
 python-multipart = "*"
 reader = "*"
-sentry-sdk = {version = "*", extras = ["fastapi"]}
+sentry-sdk = { version = "*", extras = ["fastapi"] }
 uvicorn = "*"
 
 [tool.poetry.group.dev.dependencies]
@@ -86,6 +86,8 @@ lint.ignore = [
     "PLR6301", # Checks for the presence of unused self parameter in methods definitions.
     "RUF029",  # Checks for functions declared async that do not await or otherwise use features requiring the function to be declared async.
     "TD003",   # Checks that a TODO comment is associated with a link to a relevant issue or ticket.
+    "PLR0913", # Checks for function definitions that include too many arguments.
+    "PLR0917", # Checks for function definitions that include too many positional arguments.
 
     # Conflicting lint rules when using Ruff's formatter
     # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
diff --git a/tests/test_blacklist.py b/tests/test_blacklist.py
index 4f5a317..d2a785b 100644
--- a/tests/test_blacklist.py
+++ b/tests/test_blacklist.py
@@ -39,6 +39,13 @@ def test_has_black_tags() -> None:
     check_if_has_tag(reader, feed, "blacklist_title")
     check_if_has_tag(reader, feed, "blacklist_summary")
     check_if_has_tag(reader, feed, "blacklist_content")
+    check_if_has_tag(reader, feed, "blacklist_author")
+
+    # Test regex blacklist tags
+    check_if_has_tag(reader, feed, "regex_blacklist_title")
+    check_if_has_tag(reader, feed, "regex_blacklist_summary")
+    check_if_has_tag(reader, feed, "regex_blacklist_content")
+    check_if_has_tag(reader, feed, "regex_blacklist_author")
 
     # Clean up
     reader.delete_feed(feed_url)
@@ -74,6 +81,7 @@ def test_should_be_skipped() -> None:
     # Test entry without any blacklists
     assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
 
+    # Test standard blacklist functionality
     reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd")  # pyright: ignore[reportArgumentType]
     assert entry_should_be_skipped(reader, first_entry[0]) is True, f"Entry should be skipped: {first_entry[0]}"
     reader.delete_tag(feed, "blacklist_title")
@@ -113,3 +121,81 @@ def test_should_be_skipped() -> None:
     assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
     reader.delete_tag(feed, "blacklist_author")
     assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+
+def test_regex_should_be_skipped() -> None:
+    """Test the regex filtering functionality for blacklist."""
+    reader: Reader = get_reader()
+
+    # Add feed and update entries
+    reader.add_feed(feed_url)
+    feed: Feed = reader.get_feed(feed_url)
+    reader.update_feeds()
+
+    # Get first entry
+    first_entry: list[Entry] = []
+    entries: Iterable[Entry] = reader.get_entries(feed=feed)
+    assert entries is not None, f"Entries should not be None: {entries}"
+    for entry in entries:
+        first_entry.append(entry)
+        break
+    assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
+
+    # Test entry without any regex blacklists
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+    # Test regex blacklist for title
+    reader.set_tag(feed, "regex_blacklist_title", r"fvnnn\w+")  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+        f"Entry should be skipped with regex title match: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_title")
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+    # Test regex blacklist for summary
+    reader.set_tag(feed, "regex_blacklist_summary", r"ffdnfdn\w+")  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+        f"Entry should be skipped with regex summary match: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_summary")
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+    # Test regex blacklist for content
+    reader.set_tag(feed, "regex_blacklist_content", r"ffdnfdnfdn\w+")  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+        f"Entry should be skipped with regex content match: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_content")
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+    # Test regex blacklist for author
+    reader.set_tag(feed, "regex_blacklist_author", r"TheLovinator\d*")  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+        f"Entry should be skipped with regex author match: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_author")
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+    # Test invalid regex pattern (should not raise an exception)
+    reader.set_tag(feed, "regex_blacklist_title", r"[incomplete")  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, (
+        f"Entry should not be skipped with invalid regex: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_title")
+
+    # Test multiple regex patterns separated by commas
+    reader.set_tag(feed, "regex_blacklist_author", r"pattern1,TheLovinator\d*,pattern3")  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+        f"Entry should be skipped with one matching pattern in list: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_author")
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+    # Test newline-separated regex patterns
+    newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
+    reader.set_tag(feed, "regex_blacklist_author", newline_patterns)  # pyright: ignore[reportArgumentType]
+    assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+        f"Entry should be skipped with newline-separated patterns: {first_entry[0]}"
+    )
+    reader.delete_tag(feed, "regex_blacklist_author")
+    assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 0bccb6b..5274eb8 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from discord_rss_bot.filter.utils import is_word_in_text
+from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
 
 
 def test_is_word_in_text() -> None:
@@ -14,3 +14,51 @@ def test_is_word_in_text() -> None:
     assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
     assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
     assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false
+
+
+def test_is_regex_match() -> None:
+    msg_true = "Should return True"
+    msg_false = "Should return False"
+
+    # Test basic regex patterns
+    assert is_regex_match(r"word\d+", "This text contains word123") is True, msg_true
+    assert is_regex_match(r"^Hello", "Hello world") is True, msg_true
+    assert is_regex_match(r"world$", "Hello world") is True, msg_true
+
+    # Test case insensitivity
+    assert is_regex_match(r"hello", "This text contains HELLO") is True, msg_true
+
+    # Test comma-separated patterns
+    assert is_regex_match(r"pattern1,pattern2", "This contains pattern2") is True, msg_true
+    assert is_regex_match(r"pattern1, pattern2", "This contains pattern1") is True, msg_true
+
+    # Test regex that shouldn't match
+    assert is_regex_match(r"^start", "This doesn't start with the pattern") is False, msg_false
+    assert is_regex_match(r"end$", "This doesn't end with the pattern") is False, msg_false
+
+    # Test with empty input
+    assert is_regex_match("", "Some text") is False, msg_false
+    assert is_regex_match("pattern", "") is False, msg_false
+
+    # Test with invalid regex (should not raise an exception and return False)
+    assert is_regex_match(r"[incomplete", "Some text") is False, msg_false
+
+    # Test with multiple patterns where one is invalid
+    assert is_regex_match(r"valid, [invalid, \w+", "Contains word") is True, msg_true
+
+    # Test newline-separated patterns
+    newline_patterns = "pattern1\n^start\ncontains\\d+"
+    assert is_regex_match(newline_patterns, "This contains123 text") is True, msg_true
+    assert is_regex_match(newline_patterns, "start of line") is True, msg_true
+    assert is_regex_match(newline_patterns, "pattern1 is here") is True, msg_true
+    assert is_regex_match(newline_patterns, "None of these match") is False, msg_false
+
+    # Test mixed newline and comma patterns (for backward compatibility)
+    mixed_patterns = "pattern1\npattern2,pattern3\npattern4"
+    assert is_regex_match(mixed_patterns, "Contains pattern3") is True, msg_true
+    assert is_regex_match(mixed_patterns, "Contains pattern4") is True, msg_true
+
+    # Test with empty lines and spaces
+    whitespace_patterns = "\\s+\n \n\npattern\n\n"
+    assert is_regex_match(whitespace_patterns, "text with    spaces") is True, msg_true
+    assert is_regex_match(whitespace_patterns, "text with pattern") is True, msg_true
diff --git a/tests/test_whitelist.py b/tests/test_whitelist.py
index cf39aa0..9fbb712 100644
--- a/tests/test_whitelist.py
+++ b/tests/test_whitelist.py
@@ -38,6 +38,13 @@ def test_has_white_tags() -> None:
     check_if_has_tag(reader, feed, "whitelist_title")
     check_if_has_tag(reader, feed, "whitelist_summary")
     check_if_has_tag(reader, feed, "whitelist_content")
+    check_if_has_tag(reader, feed, "whitelist_author")
+
+    # Test regex whitelist tags
+    check_if_has_tag(reader, feed, "regex_whitelist_title")
+    check_if_has_tag(reader, feed, "regex_whitelist_summary")
+    check_if_has_tag(reader, feed, "regex_whitelist_content")
+    check_if_has_tag(reader, feed, "regex_whitelist_author")
 
     # Clean up
     reader.delete_feed(feed_url)
@@ -109,3 +116,67 @@ def test_should_be_sent() -> None:
     assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
     reader.delete_tag(feed, "whitelist_author")
     assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+
+def test_regex_should_be_sent() -> None:
+    """Test the regex filtering functionality for whitelist."""
+    reader: Reader = get_reader()
+
+    # Add feed and update entries
+    reader.add_feed(feed_url)
+    feed: Feed = reader.get_feed(feed_url)
+    reader.update_feeds()
+
+    # Get first entry
+    first_entry: list[Entry] = []
+    entries: Iterable[Entry] = reader.get_entries(feed=feed)
+    assert entries is not None, "Entries should not be None"
+    for entry in entries:
+        first_entry.append(entry)
+        break
+    assert len(first_entry) == 1, "First entry should be added"
+
+    # Test entry without any regex whitelists
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+    # Test regex whitelist for title
+    reader.set_tag(feed, "regex_whitelist_title", r"fvnnn\w+")  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex title match"
+    reader.delete_tag(feed, "regex_whitelist_title")
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+    # Test regex whitelist for summary
+    reader.set_tag(feed, "regex_whitelist_summary", r"ffdnfdn\w+")  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex summary match"
+    reader.delete_tag(feed, "regex_whitelist_summary")
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+    # Test regex whitelist for content
+    reader.set_tag(feed, "regex_whitelist_content", r"ffdnfdnfdn\w+")  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex content match"
+    reader.delete_tag(feed, "regex_whitelist_content")
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+    # Test regex whitelist for author
+    reader.set_tag(feed, "regex_whitelist_author", r"TheLovinator\d*")  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex author match"
+    reader.delete_tag(feed, "regex_whitelist_author")
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+    # Test invalid regex pattern (should not raise an exception)
+    reader.set_tag(feed, "regex_whitelist_title", r"[incomplete")  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent with invalid regex"
+    reader.delete_tag(feed, "regex_whitelist_title")
+
+    # Test multiple regex patterns separated by commas
+    reader.set_tag(feed, "regex_whitelist_author", r"pattern1,TheLovinator\d*,pattern3")  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with one matching pattern in list"
+    reader.delete_tag(feed, "regex_whitelist_author")
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+    # Test newline-separated regex patterns
+    newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
+    reader.set_tag(feed, "regex_whitelist_author", newline_patterns)  # pyright: ignore[reportArgumentType]
+    assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
+    reader.delete_tag(feed, "regex_whitelist_author")
+    assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"