Add domain-wide blacklist and whitelist functionality
This commit is contained in:
parent
aa8a74ba67
commit
bdbd46ebd4
14 changed files with 930 additions and 305 deletions
|
|
@ -711,15 +711,14 @@ def send_to_discord(reader: Reader | None = None, feed: Feed | None = None, *, d
|
||||||
use_default_message_on_empty=True,
|
use_default_message_on_empty=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if the entry is blacklisted, and if it is, we will skip it.
|
# Whitelist should take precedence when configured.
|
||||||
if entry_should_be_skipped(effective_reader, entry):
|
if has_white_tags(effective_reader, entry.feed):
|
||||||
logger.info("Entry was blacklisted: %s", entry.id)
|
if not should_be_sent(effective_reader, entry):
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if the feed has a whitelist, and if it does, check if the entry is whitelisted.
|
|
||||||
if has_white_tags(effective_reader, entry.feed) and not should_be_sent(effective_reader, entry):
|
|
||||||
logger.info("Entry was not whitelisted: %s", entry.id)
|
logger.info("Entry was not whitelisted: %s", entry.id)
|
||||||
continue
|
continue
|
||||||
|
elif entry_should_be_skipped(effective_reader, entry):
|
||||||
|
logger.info("Entry was blacklisted: %s", entry.id)
|
||||||
|
continue
|
||||||
|
|
||||||
# Use a custom webhook for Hoyolab feeds.
|
# Use a custom webhook for Hoyolab feeds.
|
||||||
if is_c3kay_feed(entry.feed.url):
|
if is_c3kay_feed(entry.feed.url):
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from discord_rss_bot.filter.utils import get_domain_filter_tags
|
||||||
from discord_rss_bot.filter.utils import is_regex_match
|
from discord_rss_bot.filter.utils import is_regex_match
|
||||||
from discord_rss_bot.filter.utils import is_word_in_text
|
from discord_rss_bot.filter.utils import is_word_in_text
|
||||||
|
|
||||||
|
|
@ -11,6 +12,37 @@ if TYPE_CHECKING:
|
||||||
from reader import Reader
|
from reader import Reader
|
||||||
|
|
||||||
|
|
||||||
|
_MATCH_FIELDS: tuple[str, ...] = ("title", "summary", "content", "author")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_effective_blacklist_values(reader: Reader, feed: Feed) -> tuple[dict[str, str], dict[str, str]]:
|
||||||
|
"""Return merged feed-level and domain-level blacklist values."""
|
||||||
|
local_values: dict[str, str] = {
|
||||||
|
field: str(reader.get_tag(feed, f"blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
local_regex_values: dict[str, str] = {
|
||||||
|
field: str(reader.get_tag(feed, f"regex_blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
|
||||||
|
domain_values_raw: dict[str, str] = get_domain_filter_tags(reader, feed, "domain_blacklist")
|
||||||
|
domain_values: dict[str, str] = {
|
||||||
|
field: str(domain_values_raw.get(f"blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
domain_regex_values: dict[str, str] = {
|
||||||
|
field: str(domain_values_raw.get(f"regex_blacklist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
|
||||||
|
merged_values: dict[str, str] = {
|
||||||
|
field: ",".join(value for value in (local_values[field], domain_values[field]) if value)
|
||||||
|
for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
merged_regex_values: dict[str, str] = {
|
||||||
|
field: "\n".join(value for value in (local_regex_values[field], domain_regex_values[field]) if value)
|
||||||
|
for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
return merged_values, merged_regex_values
|
||||||
|
|
||||||
|
|
||||||
def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
|
def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
|
||||||
"""Return True if the feed has blacklist tags.
|
"""Return True if the feed has blacklist tags.
|
||||||
|
|
||||||
|
|
@ -31,26 +63,8 @@ def feed_has_blacklist_tags(reader: Reader, feed: Feed) -> bool:
|
||||||
Returns:
|
Returns:
|
||||||
bool: If the feed has any of the tags.
|
bool: If the feed has any of the tags.
|
||||||
"""
|
"""
|
||||||
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
|
merged_values, merged_regex_values = _get_effective_blacklist_values(reader, feed)
|
||||||
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
|
return any(merged_values.values()) or any(merged_regex_values.values())
|
||||||
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
|
|
||||||
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
|
|
||||||
|
|
||||||
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
|
|
||||||
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
|
|
||||||
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
|
|
||||||
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
|
|
||||||
|
|
||||||
return bool(
|
|
||||||
blacklist_title
|
|
||||||
or blacklist_author
|
|
||||||
or blacklist_content
|
|
||||||
or blacklist_summary
|
|
||||||
or regex_blacklist_author
|
|
||||||
or regex_blacklist_content
|
|
||||||
or regex_blacklist_summary
|
|
||||||
or regex_blacklist_title,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
||||||
|
|
@ -63,58 +77,55 @@ def entry_should_be_skipped(reader: Reader, entry: Entry) -> bool: # noqa: PLR0
|
||||||
Returns:
|
Returns:
|
||||||
bool: If the entry is in the blacklist.
|
bool: If the entry is in the blacklist.
|
||||||
"""
|
"""
|
||||||
feed = entry.feed
|
merged_values, merged_regex_values = _get_effective_blacklist_values(reader, entry.feed)
|
||||||
|
|
||||||
blacklist_title: str = str(reader.get_tag(feed, "blacklist_title", "")).strip()
|
|
||||||
blacklist_summary: str = str(reader.get_tag(feed, "blacklist_summary", "")).strip()
|
|
||||||
blacklist_content: str = str(reader.get_tag(feed, "blacklist_content", "")).strip()
|
|
||||||
blacklist_author: str = str(reader.get_tag(feed, "blacklist_author", "")).strip()
|
|
||||||
|
|
||||||
regex_blacklist_title: str = str(reader.get_tag(feed, "regex_blacklist_title", "")).strip()
|
|
||||||
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", "")).strip()
|
|
||||||
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", "")).strip()
|
|
||||||
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", "")).strip()
|
|
||||||
# TODO(TheLovinator): Also add support for entry_text and more.
|
# TODO(TheLovinator): Also add support for entry_text and more.
|
||||||
|
|
||||||
# Check regular blacklist
|
# Check regular blacklist
|
||||||
if entry.title and blacklist_title and is_word_in_text(blacklist_title, entry.title):
|
if entry.title and merged_values["title"] and is_word_in_text(merged_values["title"], entry.title):
|
||||||
return True
|
return True
|
||||||
if entry.summary and blacklist_summary and is_word_in_text(blacklist_summary, entry.summary):
|
if entry.summary and merged_values["summary"] and is_word_in_text(merged_values["summary"], entry.summary):
|
||||||
return True
|
return True
|
||||||
if (
|
if (
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and blacklist_content
|
and merged_values["content"]
|
||||||
and is_word_in_text(blacklist_content, entry.content[0].value)
|
and is_word_in_text(merged_values["content"], entry.content[0].value)
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
if entry.author and blacklist_author and is_word_in_text(blacklist_author, entry.author):
|
if entry.author and merged_values["author"] and is_word_in_text(merged_values["author"], entry.author):
|
||||||
return True
|
return True
|
||||||
if (
|
if (
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and blacklist_content
|
and merged_values["content"]
|
||||||
and is_word_in_text(blacklist_content, entry.content[0].value)
|
and is_word_in_text(merged_values["content"], entry.content[0].value)
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check regex blacklist
|
# Check regex blacklist
|
||||||
if entry.title and regex_blacklist_title and is_regex_match(regex_blacklist_title, entry.title):
|
if entry.title and merged_regex_values["title"] and is_regex_match(merged_regex_values["title"], entry.title):
|
||||||
return True
|
return True
|
||||||
if entry.summary and regex_blacklist_summary and is_regex_match(regex_blacklist_summary, entry.summary):
|
if (
|
||||||
|
entry.summary
|
||||||
|
and merged_regex_values["summary"]
|
||||||
|
and is_regex_match(
|
||||||
|
merged_regex_values["summary"],
|
||||||
|
entry.summary,
|
||||||
|
)
|
||||||
|
):
|
||||||
return True
|
return True
|
||||||
if (
|
if (
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and regex_blacklist_content
|
and merged_regex_values["content"]
|
||||||
and is_regex_match(regex_blacklist_content, entry.content[0].value)
|
and is_regex_match(merged_regex_values["content"], entry.content[0].value)
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
if entry.author and regex_blacklist_author and is_regex_match(regex_blacklist_author, entry.author):
|
if entry.author and merged_regex_values["author"] and is_regex_match(merged_regex_values["author"], entry.author):
|
||||||
return True
|
return True
|
||||||
return bool(
|
return bool(
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and regex_blacklist_content
|
and merged_regex_values["content"]
|
||||||
and is_regex_match(regex_blacklist_content, entry.content[0].value),
|
and is_regex_match(merged_regex_values["content"], entry.content[0].value),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,14 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import tldextract
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from reader import Feed
|
||||||
|
from reader import Reader
|
||||||
|
|
||||||
logger: logging.Logger = logging.getLogger(__name__)
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -70,3 +78,53 @@ def is_regex_match(regex_string: str, text: str) -> bool:
|
||||||
logger.info("No regex patterns matched.")
|
logger.info("No regex patterns matched.")
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_domain_key(url: str) -> str:
|
||||||
|
"""Return a normalized domain key used for domain-wide filters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to extract the domain from.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A normalized domain key (e.g. ``example.com``).
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
host: str = parsed_url.netloc.lower().strip()
|
||||||
|
host = host.removeprefix("www.")
|
||||||
|
|
||||||
|
if not host:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
ext = tldextract.extract(host)
|
||||||
|
top_domain: str = ext.top_domain_under_public_suffix
|
||||||
|
return top_domain or host
|
||||||
|
|
||||||
|
|
||||||
|
def get_domain_filter_tags(reader: Reader, feed: Feed, tag_name: str) -> dict[str, str]:
|
||||||
|
"""Return domain-wide filter tags for a feed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
reader: Reader instance.
|
||||||
|
feed: Feed instance.
|
||||||
|
tag_name: Global tag name that stores domain filters.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict[str, str]: Domain filter values for the feed's domain.
|
||||||
|
"""
|
||||||
|
domain_key: str = get_domain_key(str(feed.url))
|
||||||
|
if not domain_key:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
domain_filters: object = reader.get_tag((), tag_name, {})
|
||||||
|
if not isinstance(domain_filters, dict):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
values: object = domain_filters.get(domain_key, {})
|
||||||
|
if not isinstance(values, dict):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
return {str(key): str(value) for key, value in values.items() if isinstance(key, str)}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from discord_rss_bot.filter.utils import get_domain_filter_tags
|
||||||
from discord_rss_bot.filter.utils import is_regex_match
|
from discord_rss_bot.filter.utils import is_regex_match
|
||||||
from discord_rss_bot.filter.utils import is_word_in_text
|
from discord_rss_bot.filter.utils import is_word_in_text
|
||||||
|
|
||||||
|
|
@ -11,6 +12,37 @@ if TYPE_CHECKING:
|
||||||
from reader import Reader
|
from reader import Reader
|
||||||
|
|
||||||
|
|
||||||
|
_MATCH_FIELDS: tuple[str, ...] = ("title", "summary", "content", "author")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_effective_whitelist_values(reader: Reader, feed: Feed) -> tuple[dict[str, str], dict[str, str]]:
|
||||||
|
"""Return merged feed-level and domain-level whitelist values."""
|
||||||
|
local_values: dict[str, str] = {
|
||||||
|
field: str(reader.get_tag(feed, f"whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
local_regex_values: dict[str, str] = {
|
||||||
|
field: str(reader.get_tag(feed, f"regex_whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
|
||||||
|
domain_values_raw: dict[str, str] = get_domain_filter_tags(reader, feed, "domain_whitelist")
|
||||||
|
domain_values: dict[str, str] = {
|
||||||
|
field: str(domain_values_raw.get(f"whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
domain_regex_values: dict[str, str] = {
|
||||||
|
field: str(domain_values_raw.get(f"regex_whitelist_{field}", "")).strip() for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
|
||||||
|
merged_values: dict[str, str] = {
|
||||||
|
field: ",".join(value for value in (local_values[field], domain_values[field]) if value)
|
||||||
|
for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
merged_regex_values: dict[str, str] = {
|
||||||
|
field: "\n".join(value for value in (local_regex_values[field], domain_regex_values[field]) if value)
|
||||||
|
for field in _MATCH_FIELDS
|
||||||
|
}
|
||||||
|
return merged_values, merged_regex_values
|
||||||
|
|
||||||
|
|
||||||
def has_white_tags(reader: Reader, feed: Feed) -> bool:
|
def has_white_tags(reader: Reader, feed: Feed) -> bool:
|
||||||
"""Return True if the feed has whitelist tags.
|
"""Return True if the feed has whitelist tags.
|
||||||
|
|
||||||
|
|
@ -31,26 +63,8 @@ def has_white_tags(reader: Reader, feed: Feed) -> bool:
|
||||||
Returns:
|
Returns:
|
||||||
bool: If the feed has any of the tags.
|
bool: If the feed has any of the tags.
|
||||||
"""
|
"""
|
||||||
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
|
merged_values, merged_regex_values = _get_effective_whitelist_values(reader, feed)
|
||||||
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
|
return any(merged_values.values()) or any(merged_regex_values.values())
|
||||||
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
|
|
||||||
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
|
|
||||||
|
|
||||||
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
|
|
||||||
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
|
|
||||||
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
|
|
||||||
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
|
|
||||||
|
|
||||||
return bool(
|
|
||||||
whitelist_title
|
|
||||||
or whitelist_author
|
|
||||||
or whitelist_content
|
|
||||||
or whitelist_summary
|
|
||||||
or regex_whitelist_author
|
|
||||||
or regex_whitelist_content
|
|
||||||
or regex_whitelist_summary
|
|
||||||
or regex_whitelist_title,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
||||||
|
|
@ -63,44 +77,40 @@ def should_be_sent(reader: Reader, entry: Entry) -> bool: # noqa: PLR0911
|
||||||
Returns:
|
Returns:
|
||||||
bool: If the entry is in the whitelist.
|
bool: If the entry is in the whitelist.
|
||||||
"""
|
"""
|
||||||
feed: Feed = entry.feed
|
merged_values, merged_regex_values = _get_effective_whitelist_values(reader, entry.feed)
|
||||||
# Regular whitelist tags
|
|
||||||
whitelist_title: str = str(reader.get_tag(feed, "whitelist_title", "")).strip()
|
|
||||||
whitelist_summary: str = str(reader.get_tag(feed, "whitelist_summary", "")).strip()
|
|
||||||
whitelist_content: str = str(reader.get_tag(feed, "whitelist_content", "")).strip()
|
|
||||||
whitelist_author: str = str(reader.get_tag(feed, "whitelist_author", "")).strip()
|
|
||||||
|
|
||||||
# Regex whitelist tags
|
|
||||||
regex_whitelist_title: str = str(reader.get_tag(feed, "regex_whitelist_title", "")).strip()
|
|
||||||
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", "")).strip()
|
|
||||||
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", "")).strip()
|
|
||||||
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", "")).strip()
|
|
||||||
|
|
||||||
# Check regular whitelist
|
# Check regular whitelist
|
||||||
if entry.title and whitelist_title and is_word_in_text(whitelist_title, entry.title):
|
if entry.title and merged_values["title"] and is_word_in_text(merged_values["title"], entry.title):
|
||||||
return True
|
return True
|
||||||
if entry.summary and whitelist_summary and is_word_in_text(whitelist_summary, entry.summary):
|
if entry.summary and merged_values["summary"] and is_word_in_text(merged_values["summary"], entry.summary):
|
||||||
return True
|
return True
|
||||||
if entry.author and whitelist_author and is_word_in_text(whitelist_author, entry.author):
|
if entry.author and merged_values["author"] and is_word_in_text(merged_values["author"], entry.author):
|
||||||
return True
|
return True
|
||||||
if (
|
if (
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and whitelist_content
|
and merged_values["content"]
|
||||||
and is_word_in_text(whitelist_content, entry.content[0].value)
|
and is_word_in_text(merged_values["content"], entry.content[0].value)
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check regex whitelist
|
# Check regex whitelist
|
||||||
if entry.title and regex_whitelist_title and is_regex_match(regex_whitelist_title, entry.title):
|
if entry.title and merged_regex_values["title"] and is_regex_match(merged_regex_values["title"], entry.title):
|
||||||
return True
|
return True
|
||||||
if entry.summary and regex_whitelist_summary and is_regex_match(regex_whitelist_summary, entry.summary):
|
if (
|
||||||
|
entry.summary
|
||||||
|
and merged_regex_values["summary"]
|
||||||
|
and is_regex_match(
|
||||||
|
merged_regex_values["summary"],
|
||||||
|
entry.summary,
|
||||||
|
)
|
||||||
|
):
|
||||||
return True
|
return True
|
||||||
if entry.author and regex_whitelist_author and is_regex_match(regex_whitelist_author, entry.author):
|
if entry.author and merged_regex_values["author"] and is_regex_match(merged_regex_values["author"], entry.author):
|
||||||
return True
|
return True
|
||||||
return bool(
|
return bool(
|
||||||
entry.content
|
entry.content
|
||||||
and entry.content[0].value
|
and entry.content[0].value
|
||||||
and regex_whitelist_content
|
and merged_regex_values["content"]
|
||||||
and is_regex_match(regex_whitelist_content, entry.content[0].value),
|
and is_regex_match(merged_regex_values["content"], entry.content[0].value),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ import shutil
|
||||||
import subprocess # noqa: S404
|
import subprocess # noqa: S404
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Any
|
from typing import cast
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from reader import Reader
|
from reader import Reader
|
||||||
|
|
@ -37,11 +37,8 @@ logger: logging.Logger = logging.getLogger(__name__)
|
||||||
GIT_EXECUTABLE: str = shutil.which("git") or "git"
|
GIT_EXECUTABLE: str = shutil.which("git") or "git"
|
||||||
|
|
||||||
|
|
||||||
type TAG_VALUE = (
|
type JsonScalar = str | int | float | bool | None
|
||||||
dict[str, str | int | float | bool | dict[str, Any] | list[Any] | None]
|
type JsonLike = JsonScalar | dict[str, JsonLike] | list[JsonLike]
|
||||||
| list[str | int | float | bool | dict[str, Any] | list[Any] | None]
|
|
||||||
| None
|
|
||||||
)
|
|
||||||
|
|
||||||
# Tags that are exported per-feed (empty values are omitted).
|
# Tags that are exported per-feed (empty values are omitted).
|
||||||
_FEED_TAGS: tuple[str, ...] = (
|
_FEED_TAGS: tuple[str, ...] = (
|
||||||
|
|
@ -157,47 +154,68 @@ def setup_backup_repo(backup_path: Path) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def export_state(reader: Reader, backup_path: Path) -> None:
|
def _build_feed_state(reader: Reader) -> list[JsonLike]:
|
||||||
"""Serialise the current bot state to ``state.json`` inside *backup_path*.
|
"""Collect feed and per-feed tag state.
|
||||||
|
|
||||||
Args:
|
Returns:
|
||||||
reader: The :class:`reader.Reader` instance to read state from.
|
A list of dictionaries containing feed URLs and their associated tag values.
|
||||||
backup_path: Destination directory for the exported ``state.json``.
|
|
||||||
"""
|
"""
|
||||||
feeds_state: list[dict] = []
|
feeds_state: list[JsonLike] = []
|
||||||
for feed in reader.get_feeds():
|
for feed in reader.get_feeds():
|
||||||
feed_data: dict = {"url": feed.url}
|
feed_data: dict[str, JsonLike] = {"url": feed.url}
|
||||||
for tag in _FEED_TAGS:
|
for tag in _FEED_TAGS:
|
||||||
try:
|
try:
|
||||||
value: TAG_VALUE = reader.get_tag(feed, tag, None)
|
value: JsonLike | None = cast("JsonLike | None", reader.get_tag(feed, tag, None))
|
||||||
if value is not None and value != "": # noqa: PLC1901
|
if value is not None and value != "": # noqa: PLC1901
|
||||||
feed_data[tag] = value
|
feed_data[tag] = value
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Failed to read tag '%s' for feed '%s' during state export", tag, feed.url)
|
logger.exception("Failed to read tag '%s' for feed '%s' during state export", tag, feed.url)
|
||||||
feeds_state.append(feed_data)
|
feeds_state.append(feed_data)
|
||||||
|
return feeds_state
|
||||||
|
|
||||||
webhooks: list[str | int | float | bool | dict[str, Any] | list[Any] | None] = list(
|
|
||||||
reader.get_tag((), "webhooks", []),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Export global update interval if set
|
def _get_global_dict_tag(reader: Reader, tag_name: str) -> dict[str, JsonLike] | None:
|
||||||
global_update_interval: dict[str, Any] | None = None
|
"""Return a global tag value if it is a dictionary."""
|
||||||
global_update_config = reader.get_tag((), ".reader.update", None)
|
tag_value: JsonLike | None = cast("JsonLike | None", reader.get_tag((), tag_name, None))
|
||||||
if isinstance(global_update_config, dict):
|
return tag_value if isinstance(tag_value, dict) else None
|
||||||
global_update_interval = global_update_config
|
|
||||||
|
|
||||||
global_screenshot_layout: str | None = None
|
|
||||||
screenshot_layout = reader.get_tag((), "screenshot_layout", None)
|
|
||||||
if isinstance(screenshot_layout, str):
|
|
||||||
clean_layout = screenshot_layout.strip().lower()
|
|
||||||
if clean_layout in {"desktop", "mobile"}:
|
|
||||||
global_screenshot_layout = clean_layout
|
|
||||||
|
|
||||||
state: dict = {"feeds": feeds_state, "webhooks": webhooks}
|
def _get_global_screenshot_layout(reader: Reader) -> str | None:
|
||||||
|
"""Return normalized global screenshot layout if valid."""
|
||||||
|
screenshot_layout: JsonLike | None = cast("JsonLike | None", reader.get_tag((), "screenshot_layout", None))
|
||||||
|
if not isinstance(screenshot_layout, str):
|
||||||
|
return None
|
||||||
|
|
||||||
|
clean_layout: str = screenshot_layout.strip().lower()
|
||||||
|
return clean_layout if clean_layout in {"desktop", "mobile"} else None
|
||||||
|
|
||||||
|
|
||||||
|
def export_state(reader: Reader, backup_path: Path) -> None:
|
||||||
|
"""Serialize the current bot state to ``state.json`` inside *backup_path*.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
reader: The :class:`reader.Reader` instance to read state from.
|
||||||
|
backup_path: Destination directory for the exported ``state.json``.
|
||||||
|
"""
|
||||||
|
feeds_state: list[JsonLike] = _build_feed_state(reader)
|
||||||
|
|
||||||
|
webhooks_raw: JsonLike | None = cast("JsonLike | None", reader.get_tag((), "webhooks", []))
|
||||||
|
webhooks: list[JsonLike] = webhooks_raw if isinstance(webhooks_raw, list) else []
|
||||||
|
|
||||||
|
global_update_interval: dict[str, JsonLike] | None = _get_global_dict_tag(reader, ".reader.update")
|
||||||
|
global_screenshot_layout: str | None = _get_global_screenshot_layout(reader)
|
||||||
|
domain_blacklist: dict[str, JsonLike] | None = _get_global_dict_tag(reader, "domain_blacklist")
|
||||||
|
domain_whitelist: dict[str, JsonLike] | None = _get_global_dict_tag(reader, "domain_whitelist")
|
||||||
|
|
||||||
|
state: dict[str, JsonLike] = {"feeds": feeds_state, "webhooks": webhooks}
|
||||||
if global_update_interval is not None:
|
if global_update_interval is not None:
|
||||||
state["global_update_interval"] = global_update_interval
|
state["global_update_interval"] = global_update_interval
|
||||||
if global_screenshot_layout is not None:
|
if global_screenshot_layout is not None:
|
||||||
state["global_screenshot_layout"] = global_screenshot_layout
|
state["global_screenshot_layout"] = global_screenshot_layout
|
||||||
|
if domain_blacklist is not None:
|
||||||
|
state["domain_blacklist"] = domain_blacklist
|
||||||
|
if domain_whitelist is not None:
|
||||||
|
state["domain_whitelist"] = domain_whitelist
|
||||||
state_file: Path = backup_path / "state.json"
|
state_file: Path = backup_path / "state.json"
|
||||||
state_file.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8")
|
state_file.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,7 @@ from discord_rss_bot.feeds import get_feed_delivery_mode
|
||||||
from discord_rss_bot.feeds import get_screenshot_layout
|
from discord_rss_bot.feeds import get_screenshot_layout
|
||||||
from discord_rss_bot.feeds import send_entry_to_discord
|
from discord_rss_bot.feeds import send_entry_to_discord
|
||||||
from discord_rss_bot.feeds import send_to_discord
|
from discord_rss_bot.feeds import send_to_discord
|
||||||
|
from discord_rss_bot.filter.utils import get_domain_key
|
||||||
from discord_rss_bot.git_backup import commit_state_change
|
from discord_rss_bot.git_backup import commit_state_change
|
||||||
from discord_rss_bot.git_backup import get_backup_path
|
from discord_rss_bot.git_backup import get_backup_path
|
||||||
from discord_rss_bot.is_url_valid import is_url_valid
|
from discord_rss_bot.is_url_valid import is_url_valid
|
||||||
|
|
@ -399,6 +400,7 @@ async def post_unpause_feed(
|
||||||
@app.post("/whitelist")
|
@app.post("/whitelist")
|
||||||
async def post_set_whitelist(
|
async def post_set_whitelist(
|
||||||
reader: Annotated[Reader, Depends(get_reader_dependency)],
|
reader: Annotated[Reader, Depends(get_reader_dependency)],
|
||||||
|
*,
|
||||||
whitelist_title: Annotated[str, Form()] = "",
|
whitelist_title: Annotated[str, Form()] = "",
|
||||||
whitelist_summary: Annotated[str, Form()] = "",
|
whitelist_summary: Annotated[str, Form()] = "",
|
||||||
whitelist_content: Annotated[str, Form()] = "",
|
whitelist_content: Annotated[str, Form()] = "",
|
||||||
|
|
@ -407,6 +409,7 @@ async def post_set_whitelist(
|
||||||
regex_whitelist_summary: Annotated[str, Form()] = "",
|
regex_whitelist_summary: Annotated[str, Form()] = "",
|
||||||
regex_whitelist_content: Annotated[str, Form()] = "",
|
regex_whitelist_content: Annotated[str, Form()] = "",
|
||||||
regex_whitelist_author: Annotated[str, Form()] = "",
|
regex_whitelist_author: Annotated[str, Form()] = "",
|
||||||
|
apply_to_domain: Annotated[bool, Form()] = False,
|
||||||
feed_url: Annotated[str, Form()] = "",
|
feed_url: Annotated[str, Form()] = "",
|
||||||
) -> RedirectResponse:
|
) -> RedirectResponse:
|
||||||
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
|
"""Set what the whitelist should be sent, if you have this set only words in the whitelist will be sent.
|
||||||
|
|
@ -420,6 +423,7 @@ async def post_set_whitelist(
|
||||||
regex_whitelist_summary: Whitelisted regex for when checking the summary.
|
regex_whitelist_summary: Whitelisted regex for when checking the summary.
|
||||||
regex_whitelist_content: Whitelisted regex for when checking the content.
|
regex_whitelist_content: Whitelisted regex for when checking the content.
|
||||||
regex_whitelist_author: Whitelisted regex for when checking the author.
|
regex_whitelist_author: Whitelisted regex for when checking the author.
|
||||||
|
apply_to_domain: Also store these values as domain-wide whitelist rules.
|
||||||
feed_url: The feed we should set the whitelist for.
|
feed_url: The feed we should set the whitelist for.
|
||||||
reader: The Reader instance.
|
reader: The Reader instance.
|
||||||
|
|
||||||
|
|
@ -427,16 +431,43 @@ async def post_set_whitelist(
|
||||||
RedirectResponse: Redirect to the feed page.
|
RedirectResponse: Redirect to the feed page.
|
||||||
"""
|
"""
|
||||||
clean_feed_url: str = feed_url.strip() if feed_url else ""
|
clean_feed_url: str = feed_url.strip() if feed_url else ""
|
||||||
reader.set_tag(clean_feed_url, "whitelist_title", whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
|
whitelist_values: dict[str, str] = {
|
||||||
reader.set_tag(clean_feed_url, "whitelist_summary", whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
"whitelist_title": whitelist_title.strip(),
|
||||||
reader.set_tag(clean_feed_url, "whitelist_content", whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
|
"whitelist_summary": whitelist_summary.strip(),
|
||||||
reader.set_tag(clean_feed_url, "whitelist_author", whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
|
"whitelist_content": whitelist_content.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_whitelist_title", regex_whitelist_title) # pyright: ignore[reportArgumentType][call-overload]
|
"whitelist_author": whitelist_author.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_whitelist_summary", regex_whitelist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
"regex_whitelist_title": regex_whitelist_title.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_whitelist_content", regex_whitelist_content) # pyright: ignore[reportArgumentType][call-overload]
|
"regex_whitelist_summary": regex_whitelist_summary.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_whitelist_author", regex_whitelist_author) # pyright: ignore[reportArgumentType][call-overload]
|
"regex_whitelist_content": regex_whitelist_content.strip(),
|
||||||
|
"regex_whitelist_author": regex_whitelist_author.strip(),
|
||||||
|
}
|
||||||
|
|
||||||
commit_state_change(reader, f"Update whitelist for {clean_feed_url}")
|
for tag, value in whitelist_values.items():
|
||||||
|
reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
|
||||||
|
message: str = f"Update whitelist for {clean_feed_url}"
|
||||||
|
if apply_to_domain:
|
||||||
|
domain_key: str = get_domain_key(clean_feed_url)
|
||||||
|
if domain_key:
|
||||||
|
domain_whitelists_raw = reader.get_tag((), "domain_whitelist", {})
|
||||||
|
domain_whitelists: dict[str, dict[str, str]] = {}
|
||||||
|
if isinstance(domain_whitelists_raw, dict):
|
||||||
|
for existing_domain, existing_values in domain_whitelists_raw.items():
|
||||||
|
if isinstance(existing_domain, str) and isinstance(existing_values, dict):
|
||||||
|
domain_whitelists[existing_domain] = {
|
||||||
|
str(key): str(value) for key, value in existing_values.items() if isinstance(key, str)
|
||||||
|
}
|
||||||
|
|
||||||
|
domain_values: dict[str, str] = {k: v for k, v in whitelist_values.items() if v}
|
||||||
|
if domain_values:
|
||||||
|
domain_whitelists[domain_key] = domain_values
|
||||||
|
else:
|
||||||
|
domain_whitelists.pop(domain_key, None)
|
||||||
|
|
||||||
|
reader.set_tag((), "domain_whitelist", domain_whitelists) # pyright: ignore[reportArgumentType]
|
||||||
|
message = f"Update whitelist for {clean_feed_url} and domain {domain_key}"
|
||||||
|
|
||||||
|
commit_state_change(reader, message)
|
||||||
|
|
||||||
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
||||||
|
|
||||||
|
|
@ -468,6 +499,11 @@ async def get_whitelist(
|
||||||
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
|
regex_whitelist_summary: str = str(reader.get_tag(feed, "regex_whitelist_summary", ""))
|
||||||
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
|
regex_whitelist_content: str = str(reader.get_tag(feed, "regex_whitelist_content", ""))
|
||||||
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
|
regex_whitelist_author: str = str(reader.get_tag(feed, "regex_whitelist_author", ""))
|
||||||
|
domain_key: str = get_domain_key(feed.url)
|
||||||
|
domain_whitelist_raw = reader.get_tag((), "domain_whitelist", {})
|
||||||
|
domain_whitelist_enabled: bool = bool(
|
||||||
|
isinstance(domain_whitelist_raw, dict) and domain_key and domain_key in domain_whitelist_raw,
|
||||||
|
)
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"request": request,
|
"request": request,
|
||||||
|
|
@ -480,6 +516,9 @@ async def get_whitelist(
|
||||||
"regex_whitelist_summary": regex_whitelist_summary,
|
"regex_whitelist_summary": regex_whitelist_summary,
|
||||||
"regex_whitelist_content": regex_whitelist_content,
|
"regex_whitelist_content": regex_whitelist_content,
|
||||||
"regex_whitelist_author": regex_whitelist_author,
|
"regex_whitelist_author": regex_whitelist_author,
|
||||||
|
"domain_key": domain_key,
|
||||||
|
"domain_name": extract_domain(feed.url),
|
||||||
|
"domain_whitelist_enabled": domain_whitelist_enabled,
|
||||||
}
|
}
|
||||||
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
|
return templates.TemplateResponse(request=request, name="whitelist.html", context=context)
|
||||||
|
|
||||||
|
|
@ -487,6 +526,7 @@ async def get_whitelist(
|
||||||
@app.post("/blacklist")
|
@app.post("/blacklist")
|
||||||
async def post_set_blacklist(
|
async def post_set_blacklist(
|
||||||
reader: Annotated[Reader, Depends(get_reader_dependency)],
|
reader: Annotated[Reader, Depends(get_reader_dependency)],
|
||||||
|
*,
|
||||||
blacklist_title: Annotated[str, Form()] = "",
|
blacklist_title: Annotated[str, Form()] = "",
|
||||||
blacklist_summary: Annotated[str, Form()] = "",
|
blacklist_summary: Annotated[str, Form()] = "",
|
||||||
blacklist_content: Annotated[str, Form()] = "",
|
blacklist_content: Annotated[str, Form()] = "",
|
||||||
|
|
@ -495,6 +535,7 @@ async def post_set_blacklist(
|
||||||
regex_blacklist_summary: Annotated[str, Form()] = "",
|
regex_blacklist_summary: Annotated[str, Form()] = "",
|
||||||
regex_blacklist_content: Annotated[str, Form()] = "",
|
regex_blacklist_content: Annotated[str, Form()] = "",
|
||||||
regex_blacklist_author: Annotated[str, Form()] = "",
|
regex_blacklist_author: Annotated[str, Form()] = "",
|
||||||
|
apply_to_domain: Annotated[bool, Form()] = False,
|
||||||
feed_url: Annotated[str, Form()] = "",
|
feed_url: Annotated[str, Form()] = "",
|
||||||
) -> RedirectResponse:
|
) -> RedirectResponse:
|
||||||
"""Set the blacklist.
|
"""Set the blacklist.
|
||||||
|
|
@ -511,6 +552,7 @@ async def post_set_blacklist(
|
||||||
regex_blacklist_summary: Blacklisted regex for when checking the summary.
|
regex_blacklist_summary: Blacklisted regex for when checking the summary.
|
||||||
regex_blacklist_content: Blacklisted regex for when checking the content.
|
regex_blacklist_content: Blacklisted regex for when checking the content.
|
||||||
regex_blacklist_author: Blacklisted regex for when checking the author.
|
regex_blacklist_author: Blacklisted regex for when checking the author.
|
||||||
|
apply_to_domain: Also store these values as domain-wide blacklist rules.
|
||||||
feed_url: What feed we should set the blacklist for.
|
feed_url: What feed we should set the blacklist for.
|
||||||
reader: The Reader instance.
|
reader: The Reader instance.
|
||||||
|
|
||||||
|
|
@ -518,15 +560,43 @@ async def post_set_blacklist(
|
||||||
RedirectResponse: Redirect to the feed page.
|
RedirectResponse: Redirect to the feed page.
|
||||||
"""
|
"""
|
||||||
clean_feed_url: str = feed_url.strip() if feed_url else ""
|
clean_feed_url: str = feed_url.strip() if feed_url else ""
|
||||||
reader.set_tag(clean_feed_url, "blacklist_title", blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
|
blacklist_values: dict[str, str] = {
|
||||||
reader.set_tag(clean_feed_url, "blacklist_summary", blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
"blacklist_title": blacklist_title.strip(),
|
||||||
reader.set_tag(clean_feed_url, "blacklist_content", blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
|
"blacklist_summary": blacklist_summary.strip(),
|
||||||
reader.set_tag(clean_feed_url, "blacklist_author", blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
|
"blacklist_content": blacklist_content.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_blacklist_title", regex_blacklist_title) # pyright: ignore[reportArgumentType][call-overload]
|
"blacklist_author": blacklist_author.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_blacklist_summary", regex_blacklist_summary) # pyright: ignore[reportArgumentType][call-overload]
|
"regex_blacklist_title": regex_blacklist_title.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_blacklist_content", regex_blacklist_content) # pyright: ignore[reportArgumentType][call-overload]
|
"regex_blacklist_summary": regex_blacklist_summary.strip(),
|
||||||
reader.set_tag(clean_feed_url, "regex_blacklist_author", regex_blacklist_author) # pyright: ignore[reportArgumentType][call-overload]
|
"regex_blacklist_content": regex_blacklist_content.strip(),
|
||||||
commit_state_change(reader, f"Update blacklist for {clean_feed_url}")
|
"regex_blacklist_author": regex_blacklist_author.strip(),
|
||||||
|
}
|
||||||
|
|
||||||
|
for tag, value in blacklist_values.items():
|
||||||
|
reader.set_tag(clean_feed_url, tag, value) # pyright: ignore[reportArgumentType][call-overload]
|
||||||
|
|
||||||
|
message: str = f"Update blacklist for {clean_feed_url}"
|
||||||
|
if apply_to_domain:
|
||||||
|
domain_key: str = get_domain_key(clean_feed_url)
|
||||||
|
if domain_key:
|
||||||
|
domain_blacklists_raw = reader.get_tag((), "domain_blacklist", {})
|
||||||
|
domain_blacklists: dict[str, dict[str, str]] = {}
|
||||||
|
if isinstance(domain_blacklists_raw, dict):
|
||||||
|
for existing_domain, existing_values in domain_blacklists_raw.items():
|
||||||
|
if isinstance(existing_domain, str) and isinstance(existing_values, dict):
|
||||||
|
domain_blacklists[existing_domain] = {
|
||||||
|
str(key): str(value) for key, value in existing_values.items() if isinstance(key, str)
|
||||||
|
}
|
||||||
|
|
||||||
|
domain_values: dict[str, str] = {k: v for k, v in blacklist_values.items() if v}
|
||||||
|
if domain_values:
|
||||||
|
domain_blacklists[domain_key] = domain_values
|
||||||
|
else:
|
||||||
|
domain_blacklists.pop(domain_key, None)
|
||||||
|
|
||||||
|
reader.set_tag((), "domain_blacklist", domain_blacklists) # pyright: ignore[reportArgumentType]
|
||||||
|
message = f"Update blacklist for {clean_feed_url} and domain {domain_key}"
|
||||||
|
|
||||||
|
commit_state_change(reader, message)
|
||||||
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
return RedirectResponse(url=f"/feed?feed_url={urllib.parse.quote(clean_feed_url)}", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -556,6 +626,11 @@ async def get_blacklist(
|
||||||
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
|
regex_blacklist_summary: str = str(reader.get_tag(feed, "regex_blacklist_summary", ""))
|
||||||
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
|
regex_blacklist_content: str = str(reader.get_tag(feed, "regex_blacklist_content", ""))
|
||||||
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
|
regex_blacklist_author: str = str(reader.get_tag(feed, "regex_blacklist_author", ""))
|
||||||
|
domain_key: str = get_domain_key(feed.url)
|
||||||
|
domain_blacklist_raw = reader.get_tag((), "domain_blacklist", {})
|
||||||
|
domain_blacklist_enabled: bool = bool(
|
||||||
|
isinstance(domain_blacklist_raw, dict) and domain_key and domain_key in domain_blacklist_raw,
|
||||||
|
)
|
||||||
|
|
||||||
context = {
|
context = {
|
||||||
"request": request,
|
"request": request,
|
||||||
|
|
@ -568,6 +643,9 @@ async def get_blacklist(
|
||||||
"regex_blacklist_summary": regex_blacklist_summary,
|
"regex_blacklist_summary": regex_blacklist_summary,
|
||||||
"regex_blacklist_content": regex_blacklist_content,
|
"regex_blacklist_content": regex_blacklist_content,
|
||||||
"regex_blacklist_author": regex_blacklist_author,
|
"regex_blacklist_author": regex_blacklist_author,
|
||||||
|
"domain_key": domain_key,
|
||||||
|
"domain_name": extract_domain(feed.url),
|
||||||
|
"domain_blacklist_enabled": domain_blacklist_enabled,
|
||||||
}
|
}
|
||||||
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
|
return templates.TemplateResponse(request=request, name="blacklist.html", context=context)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
{% block title %}
|
{% block title %}
|
||||||
| Blacklist
|
| Blacklist
|
||||||
{% endblock title %}
|
{% endblock title %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="p-2 border border-dark">
|
<div class="p-2 border border-dark">
|
||||||
<form action="/blacklist" method="post">
|
<form action="/blacklist" method="post">
|
||||||
<!-- Feed URL -->
|
<!-- Feed URL -->
|
||||||
<div class="row pb-2">
|
<div class="row pb-2">
|
||||||
|
|
@ -30,19 +30,33 @@
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
{% if domain_blacklist_enabled %}
|
||||||
|
<div class="alert alert-info py-2" role="alert">Domain-wide blacklist is enabled for {{ domain_key }}.</div>
|
||||||
|
{% endif %}
|
||||||
<label for="blacklist_title" class="col-sm-6 col-form-label">Blacklist - Title</label>
|
<label for="blacklist_title" class="col-sm-6 col-form-label">Blacklist - Title</label>
|
||||||
<input name="blacklist_title" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="blacklist_title"
|
||||||
id="blacklist_title" value="{%- if blacklist_title -%}{{ blacklist_title }}{%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="blacklist_title"
|
||||||
|
value="{%- if blacklist_title -%}{{ blacklist_title }}{%- endif -%}" />
|
||||||
<label for="blacklist_summary" class="col-sm-6 col-form-label">Blacklist - Summary</label>
|
<label for="blacklist_summary" class="col-sm-6 col-form-label">Blacklist - Summary</label>
|
||||||
<input name="blacklist_summary" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="blacklist_summary"
|
||||||
id="blacklist_summary" value="{%- if blacklist_summary -%}{{ blacklist_summary }}{%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="blacklist_summary"
|
||||||
|
value="{%- if blacklist_summary -%}{{ blacklist_summary }}{%- endif -%}" />
|
||||||
<label for="blacklist_content" class="col-sm-6 col-form-label">Blacklist - Content</label>
|
<label for="blacklist_content" class="col-sm-6 col-form-label">Blacklist - Content</label>
|
||||||
<input name="blacklist_content" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="blacklist_content"
|
||||||
id="blacklist_content" value="{%- if blacklist_content -%}{{ blacklist_content }}{%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="blacklist_content"
|
||||||
|
value="{%- if blacklist_content -%}{{ blacklist_content }}{%- endif -%}" />
|
||||||
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
|
<label for="blacklist_author" class="col-sm-6 col-form-label">Blacklist - Author</label>
|
||||||
<input name="blacklist_author" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="blacklist_author"
|
||||||
id="blacklist_author" value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="blacklist_author"
|
||||||
|
value="{%- if blacklist_author -%}{{ blacklist_author }}{%- endif -%}" />
|
||||||
<div class="mt-4">
|
<div class="mt-4">
|
||||||
<div class="form-text">
|
<div class="form-text">
|
||||||
<ul class="list-inline">
|
<ul class="list-inline">
|
||||||
|
|
@ -54,7 +68,7 @@
|
||||||
<li>
|
<li>
|
||||||
Examples:
|
Examples:
|
||||||
<code>
|
<code>
|
||||||
<pre>
|
<pre>
|
||||||
^New Release:.*
|
^New Release:.*
|
||||||
\b(update|version|patch)\s+\d+\.\d+
|
\b(update|version|patch)\s+\d+\.\d+
|
||||||
.*\[(important|notice)\].*
|
.*\[(important|notice)\].*
|
||||||
|
|
@ -64,27 +78,42 @@
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label>
|
<label for="regex_blacklist_title" class="col-sm-6 col-form-label">Regex Blacklist - Title</label>
|
||||||
<textarea name="regex_blacklist_title" class="form-control bg-dark border-dark text-muted"
|
<textarea name="regex_blacklist_title"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_blacklist_title"
|
id="regex_blacklist_title"
|
||||||
rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_blacklist_title -%}{{ regex_blacklist_title }}{%- endif -%}</textarea>
|
||||||
|
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">
|
||||||
<label for="regex_blacklist_summary" class="col-sm-6 col-form-label">Regex Blacklist -
|
Regex Blacklist -
|
||||||
Summary</label>
|
Summary
|
||||||
<textarea name="regex_blacklist_summary" class="form-control bg-dark border-dark text-muted"
|
</label>
|
||||||
|
<textarea name="regex_blacklist_summary"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_blacklist_summary"
|
id="regex_blacklist_summary"
|
||||||
rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_blacklist_summary -%}{{ regex_blacklist_summary }}{%- endif -%}</textarea>
|
||||||
|
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">
|
||||||
<label for="regex_blacklist_content" class="col-sm-6 col-form-label">Regex Blacklist -
|
Regex Blacklist -
|
||||||
Content</label>
|
Content
|
||||||
<textarea name="regex_blacklist_content" class="form-control bg-dark border-dark text-muted"
|
</label>
|
||||||
|
<textarea name="regex_blacklist_content"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_blacklist_content"
|
id="regex_blacklist_content"
|
||||||
rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_blacklist_content -%}{{ regex_blacklist_content }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
<label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label>
|
<label for="regex_blacklist_author" class="col-sm-6 col-form-label">Regex Blacklist - Author</label>
|
||||||
<textarea name="regex_blacklist_author" class="form-control bg-dark border-dark text-muted"
|
<textarea name="regex_blacklist_author"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_blacklist_author"
|
id="regex_blacklist_author"
|
||||||
rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_blacklist_author -%}{{ regex_blacklist_author }}{%- endif -%}</textarea>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-check mt-3">
|
||||||
|
<input class="form-check-input"
|
||||||
|
type="checkbox"
|
||||||
|
value="true"
|
||||||
|
id="apply_to_domain"
|
||||||
|
name="apply_to_domain">
|
||||||
|
<label class="form-check-label" for="apply_to_domain">
|
||||||
|
Apply these blacklist values to all feeds on {{ domain_name }} ({{ domain_key }})
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Add a hidden feed_url field to the form -->
|
<!-- Add a hidden feed_url field to the form -->
|
||||||
|
|
@ -94,5 +123,5 @@
|
||||||
<button class="btn btn-dark btn-sm">Update blacklist</button>
|
<button class="btn btn-dark btn-sm">Update blacklist</button>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
{% endblock content %}
|
{% endblock content %}
|
||||||
|
|
|
||||||
|
|
@ -59,8 +59,16 @@
|
||||||
<!-- Display domains and their feeds -->
|
<!-- Display domains and their feeds -->
|
||||||
{% for domain, domain_feeds in domains.items() %}
|
{% for domain, domain_feeds in domains.items() %}
|
||||||
<div class="card bg-dark border border-dark mb-2">
|
<div class="card bg-dark border border-dark mb-2">
|
||||||
<div class="card-header">
|
<div class="card-header d-flex justify-content-between align-items-center gap-2">
|
||||||
<h3 class="h6 mb-0 text-white-50">{{ domain }} ({{ domain_feeds|length }})</h3>
|
<h3 class="h6 mb-0 text-white-50">{{ domain }} ({{ domain_feeds|length }})</h3>
|
||||||
|
{% if domain_feeds %}
|
||||||
|
<div class="d-flex gap-2">
|
||||||
|
<a class="btn btn-outline-light btn-sm"
|
||||||
|
href="/whitelist?feed_url={{ domain_feeds[0].url|encode_url }}">Domain whitelist</a>
|
||||||
|
<a class="btn btn-outline-light btn-sm"
|
||||||
|
href="/blacklist?feed_url={{ domain_feeds[0].url|encode_url }}">Domain blacklist</a>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body p-2">
|
<div class="card-body p-2">
|
||||||
<ul class="list-group list-unstyled mb-0">
|
<ul class="list-group list-unstyled mb-0">
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
{% block title %}
|
{% block title %}
|
||||||
| Whitelist
|
| Whitelist
|
||||||
{% endblock title %}
|
{% endblock title %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="p-2 border border-dark">
|
<div class="p-2 border border-dark">
|
||||||
<form action="/whitelist" method="post">
|
<form action="/whitelist" method="post">
|
||||||
<!-- Feed URL -->
|
<!-- Feed URL -->
|
||||||
<div class="row pb-2">
|
<div class="row pb-2">
|
||||||
|
|
@ -30,19 +30,33 @@
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
{% if domain_whitelist_enabled %}
|
||||||
|
<div class="alert alert-info py-2" role="alert">Domain-wide whitelist is enabled for {{ domain_key }}.</div>
|
||||||
|
{% endif %}
|
||||||
<label for="whitelist_title" class="col-sm-6 col-form-label">Whitelist - Title</label>
|
<label for="whitelist_title" class="col-sm-6 col-form-label">Whitelist - Title</label>
|
||||||
<input name="whitelist_title" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="whitelist_title"
|
||||||
id="whitelist_title" value="{%- if whitelist_title -%}{{ whitelist_title }} {%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="whitelist_title"
|
||||||
|
value="{%- if whitelist_title -%}{{ whitelist_title }} {%- endif -%}" />
|
||||||
<label for="whitelist_summary" class="col-sm-6 col-form-label">Whitelist - Summary</label>
|
<label for="whitelist_summary" class="col-sm-6 col-form-label">Whitelist - Summary</label>
|
||||||
<input name="whitelist_summary" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="whitelist_summary"
|
||||||
id="whitelist_summary" value="{%- if whitelist_summary -%}{{ whitelist_summary }}{%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="whitelist_summary"
|
||||||
|
value="{%- if whitelist_summary -%}{{ whitelist_summary }}{%- endif -%}" />
|
||||||
<label for="whitelist_content" class="col-sm-6 col-form-label">Whitelist - Content</label>
|
<label for="whitelist_content" class="col-sm-6 col-form-label">Whitelist - Content</label>
|
||||||
<input name="whitelist_content" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="whitelist_content"
|
||||||
id="whitelist_content" value="{%- if whitelist_content -%}{{ whitelist_content }}{%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="whitelist_content"
|
||||||
|
value="{%- if whitelist_content -%}{{ whitelist_content }}{%- endif -%}" />
|
||||||
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
|
<label for="whitelist_author" class="col-sm-6 col-form-label">Whitelist - Author</label>
|
||||||
<input name="whitelist_author" type="text" class="form-control bg-dark border-dark text-muted"
|
<input name="whitelist_author"
|
||||||
id="whitelist_author" value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
|
type="text"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
|
id="whitelist_author"
|
||||||
|
value="{%- if whitelist_author -%} {{ whitelist_author }} {%- endif -%}" />
|
||||||
<div class="mt-4">
|
<div class="mt-4">
|
||||||
<div class="form-text">
|
<div class="form-text">
|
||||||
<ul class="list-inline">
|
<ul class="list-inline">
|
||||||
|
|
@ -54,7 +68,7 @@
|
||||||
<li>
|
<li>
|
||||||
Examples:
|
Examples:
|
||||||
<code>
|
<code>
|
||||||
<pre>
|
<pre>
|
||||||
^New Release:.*
|
^New Release:.*
|
||||||
\b(update|version|patch)\s+\d+\.\d+
|
\b(update|version|patch)\s+\d+\.\d+
|
||||||
.*\[(important|notice)\].*
|
.*\[(important|notice)\].*
|
||||||
|
|
@ -64,27 +78,42 @@
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label>
|
<label for="regex_whitelist_title" class="col-sm-6 col-form-label">Regex Whitelist - Title</label>
|
||||||
<textarea name="regex_whitelist_title" class="form-control bg-dark border-dark text-muted"
|
<textarea name="regex_whitelist_title"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_whitelist_title"
|
id="regex_whitelist_title"
|
||||||
rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_whitelist_title -%}{{ regex_whitelist_title }}{%- endif -%}</textarea>
|
||||||
|
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">
|
||||||
<label for="regex_whitelist_summary" class="col-sm-6 col-form-label">Regex Whitelist -
|
Regex Whitelist -
|
||||||
Summary</label>
|
Summary
|
||||||
<textarea name="regex_whitelist_summary" class="form-control bg-dark border-dark text-muted"
|
</label>
|
||||||
|
<textarea name="regex_whitelist_summary"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_whitelist_summary"
|
id="regex_whitelist_summary"
|
||||||
rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_whitelist_summary -%}{{ regex_whitelist_summary }}{%- endif -%}</textarea>
|
||||||
|
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">
|
||||||
<label for="regex_whitelist_content" class="col-sm-6 col-form-label">Regex Whitelist -
|
Regex Whitelist -
|
||||||
Content</label>
|
Content
|
||||||
<textarea name="regex_whitelist_content" class="form-control bg-dark border-dark text-muted"
|
</label>
|
||||||
|
<textarea name="regex_whitelist_content"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_whitelist_content"
|
id="regex_whitelist_content"
|
||||||
rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_whitelist_content -%}{{ regex_whitelist_content }}{%- endif -%}</textarea>
|
||||||
|
|
||||||
<label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label>
|
<label for="regex_whitelist_author" class="col-sm-6 col-form-label">Regex Whitelist - Author</label>
|
||||||
<textarea name="regex_whitelist_author" class="form-control bg-dark border-dark text-muted"
|
<textarea name="regex_whitelist_author"
|
||||||
|
class="form-control bg-dark border-dark text-muted"
|
||||||
id="regex_whitelist_author"
|
id="regex_whitelist_author"
|
||||||
rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea>
|
rows="3">{%- if regex_whitelist_author -%}{{ regex_whitelist_author }}{%- endif -%}</textarea>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-check mt-3">
|
||||||
|
<input class="form-check-input"
|
||||||
|
type="checkbox"
|
||||||
|
value="true"
|
||||||
|
id="apply_to_domain"
|
||||||
|
name="apply_to_domain">
|
||||||
|
<label class="form-check-label" for="apply_to_domain">
|
||||||
|
Apply these whitelist values to all feeds on {{ domain_name }} ({{ domain_key }})
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Add a hidden feed_url field to the form -->
|
<!-- Add a hidden feed_url field to the form -->
|
||||||
|
|
@ -94,5 +123,5 @@
|
||||||
<button class="btn btn-dark btn-sm">Update whitelist</button>
|
<button class="btn btn-dark btn-sm">Update whitelist</button>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
{% endblock content %}
|
{% endblock content %}
|
||||||
|
|
|
||||||
|
|
@ -203,3 +203,33 @@ def test_regex_should_be_skipped() -> None:
|
||||||
)
|
)
|
||||||
reader.delete_tag(feed, "regex_blacklist_author")
|
reader.delete_tag(feed, "regex_blacklist_author")
|
||||||
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_blacklist_should_be_skipped() -> None:
|
||||||
|
"""Domain-wide blacklist should apply to feeds on the same domain."""
|
||||||
|
reader: Reader = get_reader()
|
||||||
|
|
||||||
|
reader.add_feed(feed_url)
|
||||||
|
feed: Feed = reader.get_feed(feed_url)
|
||||||
|
reader.update_feeds()
|
||||||
|
|
||||||
|
entries: Iterable[Entry] = reader.get_entries(feed=feed)
|
||||||
|
first_entry: Entry | None = next(iter(entries), None)
|
||||||
|
assert first_entry is not None, "Expected at least one entry"
|
||||||
|
|
||||||
|
assert feed_has_blacklist_tags(reader, feed) is False, "Feed should not have blacklist tags"
|
||||||
|
assert entry_should_be_skipped(reader, first_entry) is False, "Entry should not be skipped"
|
||||||
|
|
||||||
|
reader.set_tag(
|
||||||
|
(),
|
||||||
|
"domain_blacklist",
|
||||||
|
{
|
||||||
|
"lovinator.space": {
|
||||||
|
"blacklist_author": "TheLovinator",
|
||||||
|
"regex_blacklist_title": r"fvnnn\\w+",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
assert feed_has_blacklist_tags(reader, feed) is True, "Domain blacklist should count as blacklist tags"
|
||||||
|
assert entry_should_be_skipped(reader, first_entry) is True, "Entry should be skipped by domain blacklist"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from datetime import UTC
|
||||||
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import LiteralString
|
from typing import LiteralString
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
@ -190,6 +192,67 @@ def test_get_entry_delivery_mode_falls_back_to_legacy_embed_flag() -> None:
|
||||||
assert result == "text"
|
assert result == "text"
|
||||||
|
|
||||||
|
|
||||||
|
@patch("discord_rss_bot.feeds.execute_webhook")
|
||||||
|
@patch("discord_rss_bot.feeds.create_text_webhook")
|
||||||
|
@patch("discord_rss_bot.feeds.should_be_sent", return_value=True)
|
||||||
|
@patch("discord_rss_bot.feeds.has_white_tags", return_value=True)
|
||||||
|
@patch("discord_rss_bot.feeds.entry_should_be_skipped", return_value=True)
|
||||||
|
def test_send_to_discord_whitelist_precedence_over_blacklist(
|
||||||
|
mock_entry_should_be_skipped: MagicMock,
|
||||||
|
mock_has_white_tags: MagicMock,
|
||||||
|
mock_should_be_sent: MagicMock,
|
||||||
|
mock_create_text_webhook: MagicMock,
|
||||||
|
mock_execute_webhook: MagicMock,
|
||||||
|
) -> None:
|
||||||
|
"""When whitelist is configured and matches, entry should still be sent even if blacklist matches."""
|
||||||
|
reader = MagicMock()
|
||||||
|
feed = MagicMock()
|
||||||
|
feed.url = "https://example.com/feed.xml"
|
||||||
|
|
||||||
|
entry = MagicMock()
|
||||||
|
entry.id = "entry-1"
|
||||||
|
entry.feed = feed
|
||||||
|
entry.feed_url = feed.url
|
||||||
|
entry.added = datetime.now(tz=UTC)
|
||||||
|
|
||||||
|
reader.get_entries.return_value = [entry]
|
||||||
|
|
||||||
|
def get_tag_side_effect(
|
||||||
|
resource: str | Feed,
|
||||||
|
key: str,
|
||||||
|
default: str | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Side effect function for reader.get_tag to return specific values based on the key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resource: The resource for which the tag is being requested (ignored in this case).
|
||||||
|
key: The tag key being requested.
|
||||||
|
default: The default value to return if the key is not found.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- "https://discord.test/webhook" for "webhook" key
|
||||||
|
- "text" for "delivery_mode" key
|
||||||
|
- default value for any other key
|
||||||
|
"""
|
||||||
|
if key == "webhook":
|
||||||
|
return "https://discord.test/webhook"
|
||||||
|
if key == "delivery_mode":
|
||||||
|
return "text"
|
||||||
|
return default
|
||||||
|
|
||||||
|
reader.get_tag.side_effect = get_tag_side_effect
|
||||||
|
|
||||||
|
webhook = MagicMock()
|
||||||
|
mock_create_text_webhook.return_value = webhook
|
||||||
|
|
||||||
|
send_to_discord(reader=reader, feed=feed, do_once=True)
|
||||||
|
|
||||||
|
mock_has_white_tags.assert_called_once_with(reader, feed)
|
||||||
|
mock_should_be_sent.assert_called_once_with(reader, entry)
|
||||||
|
mock_entry_should_be_skipped.assert_not_called()
|
||||||
|
mock_execute_webhook.assert_called_once_with(webhook, entry, reader=reader)
|
||||||
|
|
||||||
|
|
||||||
@patch("discord_rss_bot.feeds.execute_webhook")
|
@patch("discord_rss_bot.feeds.execute_webhook")
|
||||||
@patch("discord_rss_bot.feeds.create_text_webhook")
|
@patch("discord_rss_bot.feeds.create_text_webhook")
|
||||||
@patch("discord_rss_bot.feeds.create_hoyolab_webhook")
|
@patch("discord_rss_bot.feeds.create_hoyolab_webhook")
|
||||||
|
|
|
||||||
|
|
@ -173,9 +173,11 @@ def test_export_state_creates_state_json(tmp_path: Path) -> None:
|
||||||
tag: str | None = None,
|
tag: str | None = None,
|
||||||
default: str | None = None,
|
default: str | None = None,
|
||||||
) -> list[Any] | str | None:
|
) -> list[Any] | str | None:
|
||||||
if feed_or_key == () and tag is None:
|
if feed_or_key == () and tag == "domain_blacklist":
|
||||||
# Called for global webhooks list
|
return {"example.com": {"blacklist_title": "spoiler"}}
|
||||||
return []
|
|
||||||
|
if feed_or_key == () and tag == "domain_whitelist":
|
||||||
|
return {"example.com": {"whitelist_title": "release"}}
|
||||||
|
|
||||||
if tag == "webhook":
|
if tag == "webhook":
|
||||||
return "https://discord.com/api/webhooks/123/abc"
|
return "https://discord.com/api/webhooks/123/abc"
|
||||||
|
|
@ -194,6 +196,8 @@ def test_export_state_creates_state_json(tmp_path: Path) -> None:
|
||||||
data: dict[str, Any] = json.loads(state_file.read_text(encoding="utf-8"))
|
data: dict[str, Any] = json.loads(state_file.read_text(encoding="utf-8"))
|
||||||
assert "feeds" in data
|
assert "feeds" in data
|
||||||
assert "webhooks" in data
|
assert "webhooks" in data
|
||||||
|
assert data["domain_blacklist"]["example.com"]["blacklist_title"] == "spoiler"
|
||||||
|
assert data["domain_whitelist"]["example.com"]["whitelist_title"] == "release"
|
||||||
assert data["feeds"][0]["url"] == "https://example.com/feed.rss"
|
assert data["feeds"][0]["url"] == "https://example.com/feed.rss"
|
||||||
assert data["feeds"][0]["webhook"] == "https://discord.com/api/webhooks/123/abc"
|
assert data["feeds"][0]["webhook"] == "https://discord.com/api/webhooks/123/abc"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -221,6 +221,264 @@ def test_get() -> None:
|
||||||
assert response.status_code == 200, f"/whitelist failed: {response.text}"
|
assert response.status_code == 200, f"/whitelist failed: {response.text}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_blacklist_apply_to_domain_updates_global_domain_blacklist() -> None:
|
||||||
|
"""Posting blacklist with apply_to_domain should save domain-wide blacklist values."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
|
||||||
|
# Ensure webhook exists and feed can be created.
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/blacklist",
|
||||||
|
data={
|
||||||
|
"feed_url": feed_url,
|
||||||
|
"blacklist_author": "TheLovinator",
|
||||||
|
"apply_to_domain": "true",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
|
||||||
|
|
||||||
|
domain_blacklist = reader.get_tag((), "domain_blacklist", {})
|
||||||
|
assert isinstance(domain_blacklist, dict), "domain_blacklist should be a dict"
|
||||||
|
assert "lovinator.space" in domain_blacklist, "Expected domain key in domain_blacklist"
|
||||||
|
assert domain_blacklist["lovinator.space"]["blacklist_author"] == "TheLovinator"
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_whitelist_apply_to_domain_updates_global_domain_whitelist() -> None:
|
||||||
|
"""Posting whitelist with apply_to_domain should save domain-wide whitelist values."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/whitelist",
|
||||||
|
data={
|
||||||
|
"feed_url": feed_url,
|
||||||
|
"whitelist_author": "TheLovinator",
|
||||||
|
"apply_to_domain": "true",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
|
||||||
|
|
||||||
|
domain_whitelist = reader.get_tag((), "domain_whitelist", {})
|
||||||
|
assert isinstance(domain_whitelist, dict), "domain_whitelist should be a dict"
|
||||||
|
assert "lovinator.space" in domain_whitelist, "Expected domain key in domain_whitelist"
|
||||||
|
assert domain_whitelist["lovinator.space"]["whitelist_author"] == "TheLovinator"
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_filter_pages_show_domain_enabled_notice() -> None:
|
||||||
|
"""Blacklist and whitelist pages should show domain-wide enabled notices when configured."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
reader.set_tag(
|
||||||
|
(),
|
||||||
|
"domain_blacklist",
|
||||||
|
{"lovinator.space": {"blacklist_title": "spoiler"}},
|
||||||
|
) # pyright: ignore[reportArgumentType]
|
||||||
|
reader.set_tag(
|
||||||
|
(),
|
||||||
|
"domain_whitelist",
|
||||||
|
{"lovinator.space": {"whitelist_title": "release"}},
|
||||||
|
) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
response = client.get(url="/blacklist", params={"feed_url": encoded_feed_url(feed_url)})
|
||||||
|
assert response.status_code == 200, f"/blacklist failed: {response.text}"
|
||||||
|
assert "Domain-wide blacklist is enabled for lovinator.space." in response.text
|
||||||
|
|
||||||
|
response = client.get(url="/whitelist", params={"feed_url": encoded_feed_url(feed_url)})
|
||||||
|
assert response.status_code == 200, f"/whitelist failed: {response.text}"
|
||||||
|
assert "Domain-wide whitelist is enabled for lovinator.space." in response.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_blacklist_isolation_between_domains() -> None:
|
||||||
|
"""Applying domain blacklist should not overwrite other domains."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
reader.set_tag((), "domain_blacklist", {"example.com": {"blacklist_title": "existing"}}) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/blacklist",
|
||||||
|
data={
|
||||||
|
"feed_url": feed_url,
|
||||||
|
"blacklist_author": "TheLovinator",
|
||||||
|
"apply_to_domain": "true",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
|
||||||
|
|
||||||
|
domain_blacklist = reader.get_tag((), "domain_blacklist", {})
|
||||||
|
assert isinstance(domain_blacklist, dict)
|
||||||
|
assert domain_blacklist["example.com"]["blacklist_title"] == "existing"
|
||||||
|
assert domain_blacklist["lovinator.space"]["blacklist_author"] == "TheLovinator"
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_whitelist_isolation_between_domains() -> None:
|
||||||
|
"""Applying domain whitelist should not overwrite other domains."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
reader.set_tag((), "domain_whitelist", {"example.com": {"whitelist_title": "existing"}}) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/whitelist",
|
||||||
|
data={
|
||||||
|
"feed_url": feed_url,
|
||||||
|
"whitelist_author": "TheLovinator",
|
||||||
|
"apply_to_domain": "true",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
|
||||||
|
|
||||||
|
domain_whitelist = reader.get_tag((), "domain_whitelist", {})
|
||||||
|
assert isinstance(domain_whitelist, dict)
|
||||||
|
assert domain_whitelist["example.com"]["whitelist_title"] == "existing"
|
||||||
|
assert domain_whitelist["lovinator.space"]["whitelist_author"] == "TheLovinator"
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_blacklist_removed_when_apply_to_domain_and_empty_values() -> None:
|
||||||
|
"""Submitting empty domain blacklist values should remove existing domain entry."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
reader.set_tag((), "domain_blacklist", {"lovinator.space": {"blacklist_title": "existing"}}) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/blacklist",
|
||||||
|
data={"feed_url": feed_url, "apply_to_domain": "true"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
|
||||||
|
|
||||||
|
domain_blacklist = reader.get_tag((), "domain_blacklist", {})
|
||||||
|
assert isinstance(domain_blacklist, dict)
|
||||||
|
assert "lovinator.space" not in domain_blacklist
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_whitelist_removed_when_apply_to_domain_and_empty_values() -> None:
|
||||||
|
"""Submitting empty domain whitelist values should remove existing domain entry."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
reader.set_tag((), "domain_whitelist", {"lovinator.space": {"whitelist_title": "existing"}}) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/whitelist",
|
||||||
|
data={"feed_url": feed_url, "apply_to_domain": "true"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
|
||||||
|
|
||||||
|
domain_whitelist = reader.get_tag((), "domain_whitelist", {})
|
||||||
|
assert isinstance(domain_whitelist, dict)
|
||||||
|
assert "lovinator.space" not in domain_whitelist
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_to_domain_missing_does_not_update_domain_tags() -> None:
|
||||||
|
"""When apply_to_domain is omitted, domain tags should not change."""
|
||||||
|
reader: Reader = get_reader_dependency()
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
reader.set_tag((), "domain_blacklist", {}) # pyright: ignore[reportArgumentType]
|
||||||
|
reader.set_tag((), "domain_whitelist", {}) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/blacklist",
|
||||||
|
data={"feed_url": feed_url, "blacklist_author": "TheLovinator"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post blacklist: {response.text}"
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url="/whitelist",
|
||||||
|
data={"feed_url": feed_url, "whitelist_author": "TheLovinator"},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to post whitelist: {response.text}"
|
||||||
|
|
||||||
|
assert reader.get_tag((), "domain_blacklist", {}) == {}
|
||||||
|
assert reader.get_tag((), "domain_whitelist", {}) == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_to_domain_invalid_value_rejected() -> None:
|
||||||
|
"""Invalid boolean value for apply_to_domain should return validation error."""
|
||||||
|
response = client.post(
|
||||||
|
url="/blacklist",
|
||||||
|
data={
|
||||||
|
"feed_url": feed_url,
|
||||||
|
"blacklist_author": "TheLovinator",
|
||||||
|
"apply_to_domain": "invalid-bool",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert response.status_code == 422, f"Expected 422 for invalid boolean: {response.text}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_index_shows_domain_filter_shortcuts() -> None:
|
||||||
|
"""Index should show domain whitelist/blacklist shortcut buttons."""
|
||||||
|
client.post(url="/delete_webhook", data={"webhook_url": webhook_url})
|
||||||
|
response: Response = client.post(
|
||||||
|
url="/add_webhook",
|
||||||
|
data={"webhook_name": webhook_name, "webhook_url": webhook_url},
|
||||||
|
)
|
||||||
|
assert response.status_code == 200, f"Failed to add webhook: {response.text}"
|
||||||
|
|
||||||
|
client.post(url="/remove", data={"feed_url": feed_url})
|
||||||
|
response = client.post(url="/add", data={"feed_url": feed_url, "webhook_dropdown": webhook_name})
|
||||||
|
assert response.status_code == 200, f"Failed to add feed: {response.text}"
|
||||||
|
|
||||||
|
response = client.get(url="/")
|
||||||
|
assert response.status_code == 200, f"Failed to get /: {response.text}"
|
||||||
|
assert "Domain whitelist" in response.text
|
||||||
|
assert "Domain blacklist" in response.text
|
||||||
|
assert f"/whitelist?feed_url={encoded_feed_url(feed_url)}" in response.text
|
||||||
|
assert f"/blacklist?feed_url={encoded_feed_url(feed_url)}" in response.text
|
||||||
|
|
||||||
|
|
||||||
def test_settings_page_shows_screenshot_layout_setting() -> None:
|
def test_settings_page_shows_screenshot_layout_setting() -> None:
|
||||||
response: Response = client.get(url="/settings")
|
response: Response = client.get(url="/settings")
|
||||||
assert response.status_code == 200, f"/settings failed: {response.text}"
|
assert response.status_code == 200, f"/settings failed: {response.text}"
|
||||||
|
|
|
||||||
|
|
@ -184,3 +184,33 @@ def test_regex_should_be_sent() -> None:
|
||||||
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
|
assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
|
||||||
reader.delete_tag(feed, "regex_whitelist_author")
|
reader.delete_tag(feed, "regex_whitelist_author")
|
||||||
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
|
||||||
|
def test_domain_whitelist_should_be_sent() -> None:
|
||||||
|
"""Domain-wide whitelist should apply to feeds on the same domain."""
|
||||||
|
reader: Reader = get_reader()
|
||||||
|
|
||||||
|
reader.add_feed(feed_url)
|
||||||
|
feed: Feed = reader.get_feed(feed_url)
|
||||||
|
reader.update_feeds()
|
||||||
|
|
||||||
|
entries: Iterable[Entry] = reader.get_entries(feed=feed)
|
||||||
|
first_entry: Entry | None = next(iter(entries), None)
|
||||||
|
assert first_entry is not None, "Expected at least one entry"
|
||||||
|
|
||||||
|
assert has_white_tags(reader, feed) is False, "Feed should not have whitelist tags"
|
||||||
|
assert should_be_sent(reader, first_entry) is False, "Entry should not be sent"
|
||||||
|
|
||||||
|
reader.set_tag(
|
||||||
|
(),
|
||||||
|
"domain_whitelist",
|
||||||
|
{
|
||||||
|
"lovinator.space": {
|
||||||
|
"whitelist_author": "TheLovinator",
|
||||||
|
"regex_whitelist_title": r"fvnnn\\w+",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
) # pyright: ignore[reportArgumentType]
|
||||||
|
|
||||||
|
assert has_white_tags(reader, feed) is True, "Domain whitelist should count as whitelist tags"
|
||||||
|
assert should_be_sent(reader, first_entry) is True, "Entry should be sent by domain whitelist"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue