diff --git a/pyproject.toml b/pyproject.toml
index 4cda1f6..21ab35a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ platformdirs = "*"
python-dotenv = "*"
python-multipart = "*"
reader = "*"
-sentry-sdk = {version = "*", extras = ["fastapi"]}
+sentry-sdk = { version = "*", extras = ["fastapi"] }
uvicorn = "*"
[tool.poetry.group.dev.dependencies]
@@ -86,6 +86,8 @@ lint.ignore = [
"PLR6301", # Checks for the presence of unused self parameter in methods definitions.
"RUF029", # Checks for functions declared async that do not await or otherwise use features requiring the function to be declared async.
"TD003", # Checks that a TODO comment is associated with a link to a relevant issue or ticket.
+ "PLR0913", # Checks for function definitions that include too many arguments.
+ "PLR0917", # Checks for function definitions that include too many positional arguments.
# Conflicting lint rules when using Ruff's formatter
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
diff --git a/tests/test_blacklist.py b/tests/test_blacklist.py
index 4f5a317..d2a785b 100644
--- a/tests/test_blacklist.py
+++ b/tests/test_blacklist.py
@@ -39,6 +39,13 @@ def test_has_black_tags() -> None:
check_if_has_tag(reader, feed, "blacklist_title")
check_if_has_tag(reader, feed, "blacklist_summary")
check_if_has_tag(reader, feed, "blacklist_content")
+ check_if_has_tag(reader, feed, "blacklist_author")
+
+ # Test regex blacklist tags
+ check_if_has_tag(reader, feed, "regex_blacklist_title")
+ check_if_has_tag(reader, feed, "regex_blacklist_summary")
+ check_if_has_tag(reader, feed, "regex_blacklist_content")
+ check_if_has_tag(reader, feed, "regex_blacklist_author")
# Clean up
reader.delete_feed(feed_url)
@@ -74,6 +81,7 @@ def test_should_be_skipped() -> None:
# Test entry without any blacklists
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+ # Test standard blacklist functionality
reader.set_tag(feed, "blacklist_title", "fvnnnfnfdnfdnfd") # pyright: ignore[reportArgumentType]
assert entry_should_be_skipped(reader, first_entry[0]) is True, f"Entry should be skipped: {first_entry[0]}"
reader.delete_tag(feed, "blacklist_title")
@@ -113,3 +121,81 @@ def test_should_be_skipped() -> None:
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
reader.delete_tag(feed, "blacklist_author")
assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+
+def test_regex_should_be_skipped() -> None:
+ """Test the regex filtering functionality for blacklist."""
+ reader: Reader = get_reader()
+
+ # Add feed and update entries
+ reader.add_feed(feed_url)
+ feed: Feed = reader.get_feed(feed_url)
+ reader.update_feeds()
+
+ # Get first entry
+ first_entry: list[Entry] = []
+ entries: Iterable[Entry] = reader.get_entries(feed=feed)
+ assert entries is not None, f"Entries should not be None: {entries}"
+ for entry in entries:
+ first_entry.append(entry)
+ break
+ assert len(first_entry) == 1, f"First entry should be added: {first_entry}"
+
+ # Test entry without any regex blacklists
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+ # Test regex blacklist for title
+ reader.set_tag(feed, "regex_blacklist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+ f"Entry should be skipped with regex title match: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_title")
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+ # Test regex blacklist for summary
+ reader.set_tag(feed, "regex_blacklist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+ f"Entry should be skipped with regex summary match: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_summary")
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+ # Test regex blacklist for content
+ reader.set_tag(feed, "regex_blacklist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+ f"Entry should be skipped with regex content match: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_content")
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+ # Test regex blacklist for author
+ reader.set_tag(feed, "regex_blacklist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+ f"Entry should be skipped with regex author match: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_author")
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+ # Test invalid regex pattern (should not raise an exception)
+ reader.set_tag(feed, "regex_blacklist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, (
+ f"Entry should not be skipped with invalid regex: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_title")
+
+ # Test multiple regex patterns separated by commas
+ reader.set_tag(feed, "regex_blacklist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+ f"Entry should be skipped with one matching pattern in list: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_author")
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
+
+ # Test newline-separated regex patterns
+ newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
+ reader.set_tag(feed, "regex_blacklist_author", newline_patterns) # pyright: ignore[reportArgumentType]
+ assert entry_should_be_skipped(reader, first_entry[0]) is True, (
+ f"Entry should be skipped with newline-separated patterns: {first_entry[0]}"
+ )
+ reader.delete_tag(feed, "regex_blacklist_author")
+ assert entry_should_be_skipped(reader, first_entry[0]) is False, f"Entry should not be skipped: {first_entry[0]}"
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 0bccb6b..5274eb8 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,6 @@
from __future__ import annotations
-from discord_rss_bot.filter.utils import is_word_in_text
+from discord_rss_bot.filter.utils import is_regex_match, is_word_in_text
def test_is_word_in_text() -> None:
@@ -14,3 +14,51 @@ def test_is_word_in_text() -> None:
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("Alert,Forma", "Outbreak - Mutagen Mass - Rhea (Saturn)") is False, msg_false
assert is_word_in_text("word1,word2", "This is a sample text containing none of the words.") is False, msg_false
+
+
+def test_is_regex_match() -> None:
+ msg_true = "Should return True"
+ msg_false = "Should return False"
+
+ # Test basic regex patterns
+ assert is_regex_match(r"word\d+", "This text contains word123") is True, msg_true
+ assert is_regex_match(r"^Hello", "Hello world") is True, msg_true
+ assert is_regex_match(r"world$", "Hello world") is True, msg_true
+
+ # Test case insensitivity
+ assert is_regex_match(r"hello", "This text contains HELLO") is True, msg_true
+
+ # Test comma-separated patterns
+ assert is_regex_match(r"pattern1,pattern2", "This contains pattern2") is True, msg_true
+ assert is_regex_match(r"pattern1, pattern2", "This contains pattern1") is True, msg_true
+
+ # Test regex that shouldn't match
+ assert is_regex_match(r"^start", "This doesn't start with the pattern") is False, msg_false
+ assert is_regex_match(r"end$", "This doesn't end with the pattern") is False, msg_false
+
+ # Test with empty input
+ assert is_regex_match("", "Some text") is False, msg_false
+ assert is_regex_match("pattern", "") is False, msg_false
+
+ # Test with invalid regex (should not raise an exception and return False)
+ assert is_regex_match(r"[incomplete", "Some text") is False, msg_false
+
+ # Test with multiple patterns where one is invalid
+ assert is_regex_match(r"valid, [invalid, \w+", "Contains word") is True, msg_true
+
+ # Test newline-separated patterns
+ newline_patterns = "pattern1\n^start\ncontains\\d+"
+ assert is_regex_match(newline_patterns, "This contains123 text") is True, msg_true
+ assert is_regex_match(newline_patterns, "start of line") is True, msg_true
+ assert is_regex_match(newline_patterns, "pattern1 is here") is True, msg_true
+ assert is_regex_match(newline_patterns, "None of these match") is False, msg_false
+
+ # Test mixed newline and comma patterns (for backward compatibility)
+ mixed_patterns = "pattern1\npattern2,pattern3\npattern4"
+ assert is_regex_match(mixed_patterns, "Contains pattern3") is True, msg_true
+ assert is_regex_match(mixed_patterns, "Contains pattern4") is True, msg_true
+
+ # Test with empty lines and spaces
+ whitespace_patterns = "\\s+\n \n\npattern\n\n"
+ assert is_regex_match(whitespace_patterns, "text with spaces") is True, msg_true
+ assert is_regex_match(whitespace_patterns, "text with pattern") is True, msg_true
diff --git a/tests/test_whitelist.py b/tests/test_whitelist.py
index cf39aa0..9fbb712 100644
--- a/tests/test_whitelist.py
+++ b/tests/test_whitelist.py
@@ -38,6 +38,13 @@ def test_has_white_tags() -> None:
check_if_has_tag(reader, feed, "whitelist_title")
check_if_has_tag(reader, feed, "whitelist_summary")
check_if_has_tag(reader, feed, "whitelist_content")
+ check_if_has_tag(reader, feed, "whitelist_author")
+
+ # Test regex whitelist tags
+ check_if_has_tag(reader, feed, "regex_whitelist_title")
+ check_if_has_tag(reader, feed, "regex_whitelist_summary")
+ check_if_has_tag(reader, feed, "regex_whitelist_content")
+ check_if_has_tag(reader, feed, "regex_whitelist_author")
# Clean up
reader.delete_feed(feed_url)
@@ -109,3 +116,67 @@ def test_should_be_sent() -> None:
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
reader.delete_tag(feed, "whitelist_author")
assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+
+def test_regex_should_be_sent() -> None:
+ """Test the regex filtering functionality for whitelist."""
+ reader: Reader = get_reader()
+
+ # Add feed and update entries
+ reader.add_feed(feed_url)
+ feed: Feed = reader.get_feed(feed_url)
+ reader.update_feeds()
+
+ # Get first entry
+ first_entry: list[Entry] = []
+ entries: Iterable[Entry] = reader.get_entries(feed=feed)
+ assert entries is not None, "Entries should not be None"
+ for entry in entries:
+ first_entry.append(entry)
+ break
+ assert len(first_entry) == 1, "First entry should be added"
+
+ # Test entry without any regex whitelists
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+ # Test regex whitelist for title
+ reader.set_tag(feed, "regex_whitelist_title", r"fvnnn\w+") # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex title match"
+ reader.delete_tag(feed, "regex_whitelist_title")
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+ # Test regex whitelist for summary
+ reader.set_tag(feed, "regex_whitelist_summary", r"ffdnfdn\w+") # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex summary match"
+ reader.delete_tag(feed, "regex_whitelist_summary")
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+ # Test regex whitelist for content
+ reader.set_tag(feed, "regex_whitelist_content", r"ffdnfdnfdn\w+") # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex content match"
+ reader.delete_tag(feed, "regex_whitelist_content")
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+ # Test regex whitelist for author
+ reader.set_tag(feed, "regex_whitelist_author", r"TheLovinator\d*") # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with regex author match"
+ reader.delete_tag(feed, "regex_whitelist_author")
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+ # Test invalid regex pattern (should not raise an exception)
+ reader.set_tag(feed, "regex_whitelist_title", r"[incomplete") # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent with invalid regex"
+ reader.delete_tag(feed, "regex_whitelist_title")
+
+ # Test multiple regex patterns separated by commas
+ reader.set_tag(feed, "regex_whitelist_author", r"pattern1,TheLovinator\d*,pattern3") # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with one matching pattern in list"
+ reader.delete_tag(feed, "regex_whitelist_author")
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
+
+ # Test newline-separated regex patterns
+ newline_patterns = "pattern1\nTheLovinator\\d*\npattern3"
+ reader.set_tag(feed, "regex_whitelist_author", newline_patterns) # pyright: ignore[reportArgumentType]
+ assert should_be_sent(reader, first_entry[0]) is True, "Entry should be sent with newline-separated patterns"
+ reader.delete_tag(feed, "regex_whitelist_author")
+ assert should_be_sent(reader, first_entry[0]) is False, "Entry should not be sent"
From 97d06ddb434cb87d9f9ae4a447985b153fcdbc59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Thu, 3 Apr 2025 06:20:01 +0200
Subject: [PATCH 005/165] =?UTF-8?q?Embed=20YouTube=20videos=20in=20/feed?=
=?UTF-8?q?=20HTML.=20Strong=20code,=20many=20bananas!=20=F0=9F=A6=8D?=
=?UTF-8?q?=F0=9F=A6=8D=F0=9F=A6=8D=F0=9F=A6=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
discord_rss_bot/feeds.py | 20 +++++
discord_rss_bot/main.py | 45 +++++++++++
discord_rss_bot/templates/feed.html | 4 +
tests/test_feeds.py | 119 +++++++++++++++++++++++++++-
4 files changed, 187 insertions(+), 1 deletion(-)
diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py
index ccb0a14..46c6e50 100644
--- a/discord_rss_bot/feeds.py
+++ b/discord_rss_bot/feeds.py
@@ -67,6 +67,10 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) ->
logger.exception("Error getting should_send_embed tag for feed: %s", entry.feed.url)
should_send_embed = True
+ # YouTube feeds should never use embeds
+ if is_youtube_feed(entry.feed.url):
+ should_send_embed = False
+
if should_send_embed:
webhook = create_embed_webhook(webhook_url, entry)
else:
@@ -295,6 +299,18 @@ def execute_webhook(webhook: DiscordWebhook, entry: Entry) -> None:
logger.info("Sent entry to Discord: %s", entry.id)
+def is_youtube_feed(feed_url: str) -> bool:
+ """Check if the feed is a YouTube feed.
+
+ Args:
+ feed_url: The feed URL to check.
+
+ Returns:
+ bool: True if the feed is a YouTube feed, False otherwise.
+ """
+ return "youtube.com/feeds/videos.xml" in feed_url
+
+
def should_send_embed_check(reader: Reader, entry: Entry) -> bool:
"""Check if we should send an embed to Discord.
@@ -305,6 +321,10 @@ def should_send_embed_check(reader: Reader, entry: Entry) -> bool:
Returns:
bool: True if we should send an embed, False otherwise.
"""
+ # YouTube feeds should never use embeds - only links
+ if is_youtube_feed(entry.feed.url):
+ return False
+
try:
should_send_embed = bool(reader.get_tag(entry.feed, "should_send_embed"))
except TagNotFoundError:
diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py
index a7c6510..00349ac 100644
--- a/discord_rss_bot/main.py
+++ b/discord_rss_bot/main.py
@@ -732,6 +732,27 @@ def create_html_for_feed(entries: Iterable[Entry]) -> str:
entry_id: str = urllib.parse.quote(entry.id)
to_discord_html: str = f"Send to Discord"
+
+ # Check if this is a YouTube feed entry and the entry has a link
+ is_youtube_feed = "youtube.com/feeds/videos.xml" in entry.feed.url
+ video_embed_html = ""
+
+ if is_youtube_feed and entry.link:
+ # Extract the video ID and create an embed if possible
+ video_id: str | None = extract_youtube_video_id(entry.link)
+ if video_id:
+ video_embed_html: str = f"""
+
+
+
+ """
+ # Don't use the first image if we have a video embed
+ first_image = ""
+
image_html: str = f"" if first_image else ""
html += f"""
Hello there!
+
You need to add a webhook here to get started. After that, you can
add feeds here. You can find both of these links in the navigation bar
above.
@@ -79,6 +100,7 @@
Thanks!
{% endif %}
+
{% if broken_feeds %}
@@ -103,6 +125,7 @@
{% endif %}
+
{% if feeds_without_attached_webhook %}
diff --git a/tests/test_feeds.py b/tests/test_feeds.py
index 037711b..8fa6c4b 100644
--- a/tests/test_feeds.py
+++ b/tests/test_feeds.py
@@ -10,6 +10,7 @@ import pytest
from reader import Feed, Reader, make_reader
from discord_rss_bot.feeds import (
+ extract_domain,
is_youtube_feed,
send_entry_to_discord,
send_to_discord,
@@ -202,3 +203,57 @@ def test_send_entry_to_discord_youtube_feed(
# Verify execute_webhook was called
mock_execute_webhook.assert_called_once_with(mock_webhook, mock_entry)
+
+
+def test_extract_domain_youtube_feed() -> None:
+ """Test extract_domain for YouTube feeds."""
+ url: str = "https://www.youtube.com/feeds/videos.xml?channel_id=123456"
+ assert extract_domain(url) == "YouTube", "YouTube feeds should return 'YouTube' as the domain."
+
+
+def test_extract_domain_reddit_feed() -> None:
+ """Test extract_domain for Reddit feeds."""
+ url: str = "https://www.reddit.com/r/Python/.rss"
+ assert extract_domain(url) == "Reddit", "Reddit feeds should return 'Reddit' as the domain."
+
+
+def test_extract_domain_github_feed() -> None:
+ """Test extract_domain for GitHub feeds."""
+ url: str = "https://www.github.com/user/repo"
+ assert extract_domain(url) == "GitHub", "GitHub feeds should return 'GitHub' as the domain."
+
+
+def test_extract_domain_custom_domain() -> None:
+ """Test extract_domain for custom domains."""
+ url: str = "https://www.example.com/feed"
+ assert extract_domain(url) == "Example", "Custom domains should return the capitalized first part of the domain."
+
+
+def test_extract_domain_no_www_prefix() -> None:
+ """Test extract_domain removes 'www.' prefix."""
+ url: str = "https://www.example.com/feed"
+ assert extract_domain(url) == "Example", "The 'www.' prefix should be removed from the domain."
+
+
+def test_extract_domain_no_tld() -> None:
+ """Test extract_domain for domains without a TLD."""
+ url: str = "https://localhost/feed"
+ assert extract_domain(url) == "Localhost", "Domains without a TLD should return the capitalized domain."
+
+
+def test_extract_domain_invalid_url() -> None:
+ """Test extract_domain for invalid URLs."""
+ url: str = "not-a-valid-url"
+ assert extract_domain(url) == "Other", "Invalid URLs should return 'Other' as the domain."
+
+
+def test_extract_domain_empty_url() -> None:
+ """Test extract_domain for empty URLs."""
+ url: str = ""
+ assert extract_domain(url) == "Other", "Empty URLs should return 'Other' as the domain."
+
+
+def test_extract_domain_special_characters() -> None:
+ """Test extract_domain for URLs with special characters."""
+ url: str = "https://www.ex-ample.com/feed"
+ assert extract_domain(url) == "Ex-ample", "Domains with special characters should return the capitalized domain."
diff --git a/tests/test_main.py b/tests/test_main.py
index 59bd109..c86901f 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -45,7 +45,7 @@ def test_search() -> None:
# Check that the feed was added.
response = client.get(url="/")
assert response.status_code == 200, f"Failed to get /: {response.text}"
- assert feed_url in response.text, f"Feed not found in /: {response.text}"
+ assert encoded_feed_url(feed_url) in response.text, f"Feed not found in /: {response.text}"
# Search for an entry.
response: Response = client.get(url="/search/?query=a")
@@ -85,7 +85,7 @@ def test_create_feed() -> None:
# Check that the feed was added.
response = client.get(url="/")
assert response.status_code == 200, f"Failed to get /: {response.text}"
- assert feed_url in response.text, f"Feed not found in /: {response.text}"
+ assert encoded_feed_url(feed_url) in response.text, f"Feed not found in /: {response.text}"
def test_get() -> None:
@@ -103,7 +103,7 @@ def test_get() -> None:
# Check that the feed was added.
response = client.get("/")
assert response.status_code == 200, f"Failed to get /: {response.text}"
- assert feed_url in response.text, f"Feed not found in /: {response.text}"
+ assert encoded_feed_url(feed_url) in response.text, f"Feed not found in /: {response.text}"
response: Response = client.get(url="/add")
assert response.status_code == 200, f"/add failed: {response.text}"
@@ -157,7 +157,7 @@ def test_pause_feed() -> None:
# Check that the feed was paused.
response = client.get(url="/")
assert response.status_code == 200, f"Failed to get /: {response.text}"
- assert feed_url in response.text, f"Feed not found in /: {response.text}"
+ assert encoded_feed_url(feed_url) in response.text, f"Feed not found in /: {response.text}"
def test_unpause_feed() -> None:
@@ -184,7 +184,7 @@ def test_unpause_feed() -> None:
# Check that the feed was unpaused.
response = client.get(url="/")
assert response.status_code == 200, f"Failed to get /: {response.text}"
- assert feed_url in response.text, f"Feed not found in /: {response.text}"
+ assert encoded_feed_url(feed_url) in response.text, f"Feed not found in /: {response.text}"
def test_remove_feed() -> None:
From cd0f63d59a99224a915c23112b7bcf777011cfb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Wed, 16 Apr 2025 13:32:31 +0200
Subject: [PATCH 007/165] Add tldextract for improved domain extraction and add
new tests for extract_domain function
---
discord_rss_bot/feeds.py | 11 +++++------
pyproject.toml | 1 +
tests/test_feeds.py | 19 +++++++++++++++++++
3 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py
index 83ac2fd..7852b0d 100644
--- a/discord_rss_bot/feeds.py
+++ b/discord_rss_bot/feeds.py
@@ -7,6 +7,7 @@ import re
from typing import TYPE_CHECKING
from urllib.parse import ParseResult, urlparse
+import tldextract
from discord_webhook import DiscordEmbed, DiscordWebhook
from fastapi import HTTPException
from reader import Entry, EntryNotFoundError, Feed, FeedExistsError, Reader, ReaderError, StorageError, TagNotFoundError
@@ -70,12 +71,10 @@ def extract_domain(url: str) -> str: # noqa: PLR0911
if domain in domain_mapping:
return domain_mapping[domain]
- # For other domains, capitalize the first part before the TLD
- parts: list[str] = domain.split(".")
- min_domain_parts = 2
- if len(parts) >= min_domain_parts:
- return parts[0].capitalize()
-
+ # Use tldextract to get the domain (SLD)
+ ext = tldextract.extract(url)
+ if ext.domain:
+ return ext.domain.capitalize()
return domain.capitalize()
except (ValueError, AttributeError, TypeError) as e:
logger.warning("Error extracting domain from %s: %s", url, e)
diff --git a/pyproject.toml b/pyproject.toml
index 21ab35a..f5758e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
"python-multipart",
"reader",
"sentry-sdk[fastapi]",
+ "tldextract",
"uvicorn",
]
diff --git a/tests/test_feeds.py b/tests/test_feeds.py
index 8fa6c4b..2b3a2b4 100644
--- a/tests/test_feeds.py
+++ b/tests/test_feeds.py
@@ -257,3 +257,22 @@ def test_extract_domain_special_characters() -> None:
"""Test extract_domain for URLs with special characters."""
url: str = "https://www.ex-ample.com/feed"
assert extract_domain(url) == "Ex-ample", "Domains with special characters should return the capitalized domain."
+
+
+@pytest.mark.parametrize(
+ argnames=("url", "expected"),
+ argvalues=[
+ ("https://blog.something.com", "Something"),
+ ("https://www.something.com", "Something"),
+ ("https://subdomain.example.co.uk", "Example"),
+ ("https://github.com/user/repo", "GitHub"),
+ ("https://youtube.com/feeds/videos.xml?channel_id=abc", "YouTube"),
+ ("https://reddit.com/r/python/.rss", "Reddit"),
+ ("", "Other"),
+ ("not a url", "Other"),
+ ("https://www.example.com", "Example"),
+ ("https://foo.bar.baz.com", "Baz"),
+ ],
+)
+def test_extract_domain(url: str, expected: str) -> None:
+ assert extract_domain(url) == expected
From e33b331564732b9b32f89651dd933db31a5dcc18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Wed, 16 Apr 2025 13:33:18 +0200
Subject: [PATCH 008/165] Update ruff-pre-commit to version 0.11.5
---
.pre-commit-config.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 908367d..867131e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
# An extremely fast Python linter and formatter.
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.11.2
+ rev: v0.11.5
hooks:
- id: ruff-format
- id: ruff
From 544ef6dca3820a65c3d61e1c19a07f29e720f068 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sat, 3 May 2025 19:42:20 +0200
Subject: [PATCH 009/165] Update ruff-pre-commit to version 0.11.8
---
.pre-commit-config.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 867131e..aca9273 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
# An extremely fast Python linter and formatter.
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.11.5
+ rev: v0.11.8
hooks:
- id: ruff-format
- id: ruff
From ffd6f2f9f25150079635035e879c0e75a4b88586 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sun, 4 May 2025 03:48:22 +0200
Subject: [PATCH 010/165] Add Hoyolab API integration
---
.vscode/settings.json | 2 +
README.md | 16 ++-
discord_rss_bot/custom_message.py | 13 +-
discord_rss_bot/feeds.py | 28 ++++-
discord_rss_bot/hoyolab_api.py | 193 ++++++++++++++++++++++++++++++
tests/test_hoyolab_api.py | 39 ++++++
6 files changed, 276 insertions(+), 15 deletions(-)
create mode 100644 discord_rss_bot/hoyolab_api.py
create mode 100644 tests/test_hoyolab_api.py
diff --git a/.vscode/settings.json b/.vscode/settings.json
index f929fff..85832f8 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -2,6 +2,8 @@
"cSpell.words": [
"botuser",
"Genshins",
+ "healthcheck",
+ "Hoyolab",
"levelname",
"Lovinator",
"markdownified",
diff --git a/README.md b/README.md
index 849fb98..8232dea 100644
--- a/README.md
+++ b/README.md
@@ -2,8 +2,20 @@
Subscribe to RSS feeds and get updates to a Discord webhook.
-> [!NOTE]
-> You should look at [MonitoRSS](https://github.com/synzen/monitorss) for a more feature-rich project.
+## Features
+
+- Subscribe to RSS feeds and get updates to a Discord webhook.
+- Web interface to manage subscriptions.
+- Customizable message format for each feed.
+- Choose between Discord embed or plain text.
+- Regex filters for RSS feeds.
+- Blacklist/whitelist words in the title/description/author/etc.
+- Gets extra information from APIs if available, currently for:
+ - [https://feeds.c3kay.de/](https://feeds.c3kay.de/)
+ - Genshin Impact News
+ - Honkai Impact 3rd News
+ - Honkai Starrail News
+ - Zenless Zone Zero News
## Installation
diff --git a/discord_rss_bot/custom_message.py b/discord_rss_bot/custom_message.py
index 9cb03e5..d3ca74d 100644
--- a/discord_rss_bot/custom_message.py
+++ b/discord_rss_bot/custom_message.py
@@ -152,14 +152,7 @@ def get_first_image(summary: str | None, content: str | None) -> str:
logger.warning("Invalid URL: %s", src)
continue
- # Genshins first image is a divider, so we ignore it.
- # https://hyl-static-res-prod.hoyolab.com/divider_config/PC/line_3.png
- skip_images: list[str] = [
- "https://img-os-static.hoyolab.com/divider_config/",
- "https://hyl-static-res-prod.hoyolab.com/divider_config/",
- ]
- if not str(image.attrs["src"]).startswith(tuple(skip_images)):
- return str(image.attrs["src"])
+ return str(image.attrs["src"])
if summary and (images := BeautifulSoup(summary, features="lxml").find_all("img")):
for image in images:
if not isinstance(image, Tag) or "src" not in image.attrs:
@@ -170,9 +163,7 @@ def get_first_image(summary: str | None, content: str | None) -> str:
logger.warning("Invalid URL: %s", image.attrs["src"])
continue
- # Genshins first image is a divider, so we ignore it.
- if not str(image.attrs["src"]).startswith("https://img-os-static.hoyolab.com/divider_config"):
- return str(image.attrs["src"])
+ return str(image.attrs["src"])
return ""
diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py
index 7852b0d..90350b0 100644
--- a/discord_rss_bot/feeds.py
+++ b/discord_rss_bot/feeds.py
@@ -4,7 +4,7 @@ import datetime
import logging
import pprint
import re
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
from urllib.parse import ParseResult, urlparse
import tldextract
@@ -20,6 +20,12 @@ from discord_rss_bot.custom_message import (
)
from discord_rss_bot.filter.blacklist import entry_should_be_skipped
from discord_rss_bot.filter.whitelist import has_white_tags, should_be_sent
+from discord_rss_bot.hoyolab_api import (
+ create_hoyolab_webhook,
+ extract_post_id_from_hoyolab_url,
+ fetch_hoyolab_post,
+ is_c3kay_feed,
+)
from discord_rss_bot.is_url_valid import is_url_valid
from discord_rss_bot.missing_tags import add_missing_tags
from discord_rss_bot.settings import default_custom_message, get_reader
@@ -81,7 +87,7 @@ def extract_domain(url: str) -> str: # noqa: PLR0911
return "Other"
-def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) -> str | None:
+def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) -> str | None: # noqa: PLR0912
"""Send a single entry to Discord.
Args:
@@ -99,6 +105,24 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) ->
if not webhook_url:
return "No webhook URL found."
+ # Check if this is a c3kay feed
+ if is_c3kay_feed(entry.feed.url):
+ entry_link: str | None = entry.link
+ if entry_link:
+ post_id: str | None = extract_post_id_from_hoyolab_url(entry_link)
+ if post_id:
+ post_data: dict[str, Any] | None = fetch_hoyolab_post(post_id)
+ if post_data:
+ webhook = create_hoyolab_webhook(webhook_url, entry, post_data)
+ execute_webhook(webhook, entry)
+ return None
+ logger.warning(
+ "Failed to create Hoyolab webhook for feed %s, falling back to regular processing",
+ entry.feed.url,
+ )
+ else:
+ logger.warning("No entry link found for feed %s, falling back to regular processing", entry.feed.url)
+
webhook_message: str = ""
# Try to get the custom message for the feed. If the user has none, we will use the default message.
diff --git a/discord_rss_bot/hoyolab_api.py b/discord_rss_bot/hoyolab_api.py
new file mode 100644
index 0000000..cb1ed71
--- /dev/null
+++ b/discord_rss_bot/hoyolab_api.py
@@ -0,0 +1,193 @@
+from __future__ import annotations
+
+import contextlib
+import json
+import logging
+import re
+from typing import TYPE_CHECKING, Any
+
+import requests
+from discord_webhook import DiscordEmbed, DiscordWebhook
+
+if TYPE_CHECKING:
+ from reader import Entry
+
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def is_c3kay_feed(feed_url: str) -> bool:
+ """Check if the feed is from c3kay.de.
+
+ Args:
+ feed_url: The feed URL to check.
+
+ Returns:
+ bool: True if the feed is from c3kay.de, False otherwise.
+ """
+ return "feeds.c3kay.de" in feed_url
+
+
+def extract_post_id_from_hoyolab_url(url: str) -> str | None:
+ """Extract the post ID from a Hoyolab URL.
+
+ Args:
+ url: The Hoyolab URL to extract the post ID from.
+ For example: https://www.hoyolab.com/article/38588239
+
+ Returns:
+ str | None: The post ID if found, None otherwise.
+ """
+ try:
+ match: re.Match[str] | None = re.search(r"/article/(\d+)", url)
+ if match:
+ return match.group(1)
+ except (ValueError, AttributeError, TypeError) as e:
+ logger.warning("Error extracting post ID from Hoyolab URL %s: %s", url, e)
+
+ return None
+
+
+def fetch_hoyolab_post(post_id: str) -> dict[str, Any] | None:
+ """Fetch post data from the Hoyolab API.
+
+ Args:
+ post_id: The post ID to fetch.
+
+ Returns:
+ dict[str, Any] | None: The post data if successful, None otherwise.
+ """
+ if not post_id:
+ return None
+
+ http_ok = 200
+ try:
+ url: str = f"https://bbs-api-os.hoyolab.com/community/post/wapi/getPostFull?post_id={post_id}"
+ response: requests.Response = requests.get(url, timeout=10)
+
+ if response.status_code == http_ok:
+ data: dict[str, Any] = response.json()
+ if data.get("retcode") == 0 and "data" in data and "post" in data["data"]:
+ return data["data"]["post"]
+
+ logger.warning("Failed to fetch Hoyolab post %s: %s", post_id, response.text)
+ except (requests.RequestException, ValueError):
+ logger.exception("Error fetching Hoyolab post %s", post_id)
+
+ return None
+
+
+def create_hoyolab_webhook(webhook_url: str, entry: Entry, post_data: dict[str, Any]) -> DiscordWebhook: # noqa: C901, PLR0912, PLR0914, PLR0915
+ """Create a webhook with data from the Hoyolab API.
+
+ Args:
+ webhook_url: The webhook URL.
+ entry: The entry to send to Discord.
+ post_data: The post data from the Hoyolab API.
+
+ Returns:
+ DiscordWebhook: The webhook with the embed.
+ """
+ entry_link: str = entry.link or entry.feed.url
+ webhook = DiscordWebhook(url=webhook_url, rate_limit_retry=True)
+
+ # Extract relevant data from the post
+ post: dict[str, Any] = post_data.get("post", {})
+ subject: str = post.get("subject", "")
+ content: str = post.get("content", "{}")
+
+ logger.debug("Post subject: %s", subject)
+ logger.debug("Post content: %s", content)
+
+ content_data: dict[str, str] = {}
+ with contextlib.suppress(json.JSONDecodeError, ValueError):
+ content_data = json.loads(content)
+
+ logger.debug("Content data: %s", content_data)
+
+ description: str = content_data.get("describe", "")
+ if not description:
+ description = post.get("desc", "")
+
+ # Create the embed
+ discord_embed = DiscordEmbed()
+
+ # Set title and description
+ discord_embed.set_title(subject)
+ discord_embed.set_url(entry_link)
+
+ # Get post.image_list
+ image_list: list[dict[str, Any]] = post_data.get("image_list", [])
+ if image_list:
+ image_url: str = str(image_list[0].get("url", ""))
+ image_height: int = int(image_list[0].get("height", 1080))
+ image_width: int = int(image_list[0].get("width", 1920))
+
+ logger.debug("Image URL: %s, Height: %s, Width: %s", image_url, image_height, image_width)
+ discord_embed.set_image(url=image_url, height=image_height, width=image_width)
+
+ video: dict[str, str | int | bool] = post_data.get("video", {})
+ if video and video.get("url"):
+ video_url: str = str(video.get("url", ""))
+ logger.debug("Video URL: %s", video_url)
+ with contextlib.suppress(requests.RequestException):
+ video_response: requests.Response = requests.get(video_url, stream=True, timeout=10)
+ if video_response.ok:
+ webhook.add_file(
+ file=video_response.content,
+ filename=f"{entry.id}.mp4",
+ )
+
+ game = post_data.get("game", {})
+
+ if game and game.get("color"):
+ game_color = str(game.get("color", ""))
+ discord_embed.set_color(game_color.removeprefix("#"))
+
+ user: dict[str, str | int | bool] = post_data.get("user", {})
+ author_name: str = str(user.get("nickname", ""))
+ avatar_url: str = str(user.get("avatar_url", ""))
+ if author_name:
+ webhook.avatar_url = avatar_url
+ webhook.username = author_name
+
+ classification = post_data.get("classification", {})
+
+ if classification and classification.get("name"):
+ footer = str(classification.get("name", ""))
+ discord_embed.set_footer(text=footer)
+
+ webhook.add_embed(discord_embed)
+
+ # Only show Youtube URL if available
+ structured_content: str = post.get("structured_content", "")
+ if structured_content: # noqa: PLR1702
+ try:
+ structured_content_data: list[dict[str, Any]] = json.loads(structured_content)
+ for item in structured_content_data:
+ if item.get("insert") and isinstance(item["insert"], dict):
+ video_url: str = str(item["insert"].get("video", ""))
+ if video_url:
+ video_id_match: re.Match[str] | None = re.search(r"embed/([a-zA-Z0-9_-]+)", video_url)
+ if video_id_match:
+ video_id: str = video_id_match.group(1)
+ logger.debug("Video ID: %s", video_id)
+ webhook.content = f"https://www.youtube.com/watch?v={video_id}"
+ webhook.remove_embeds()
+
+ except (json.JSONDecodeError, ValueError) as e:
+ logger.warning("Error parsing structured content: %s", e)
+
+ event_start_date: str = post.get("event_start_date", "")
+ if event_start_date and event_start_date != "0":
+ discord_embed.add_embed_field(name="Start", value=f"")
+
+ event_end_date: str = post.get("event_end_date", "")
+ if event_end_date and event_end_date != "0":
+ discord_embed.add_embed_field(name="End", value=f"")
+
+ created_at: str = post.get("created_at", "")
+ if created_at and created_at != "0":
+ discord_embed.set_timestamp(timestamp=created_at)
+
+ return webhook
diff --git a/tests/test_hoyolab_api.py b/tests/test_hoyolab_api.py
new file mode 100644
index 0000000..60c83ae
--- /dev/null
+++ b/tests/test_hoyolab_api.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from discord_rss_bot.hoyolab_api import extract_post_id_from_hoyolab_url
+
+
+class TestExtractPostIdFromHoyolabUrl:
+ def test_extract_post_id_from_article_url(self) -> None:
+ """Test extracting post ID from a direct article URL."""
+ test_cases: list[str] = [
+ "https://www.hoyolab.com/article/38588239",
+ "http://hoyolab.com/article/12345",
+ "https://www.hoyolab.com/article/987654321/comments",
+ ]
+
+ expected_ids: list[str] = ["38588239", "12345", "987654321"]
+
+ for url, expected_id in zip(test_cases, expected_ids, strict=False):
+ assert extract_post_id_from_hoyolab_url(url) == expected_id
+
+ def test_url_without_post_id(self) -> None:
+ """Test with a URL that doesn't have a post ID."""
+ test_cases: list[str] = [
+ "https://www.hoyolab.com/community",
+ ]
+
+ for url in test_cases:
+ assert extract_post_id_from_hoyolab_url(url) is None
+
+ def test_edge_cases(self) -> None:
+ """Test edge cases like None, empty string, and malformed URLs."""
+ test_cases: list[str | None] = [
+ None,
+ "",
+ "not_a_url",
+ "http:/", # Malformed URL
+ ]
+
+ for url in test_cases:
+ assert extract_post_id_from_hoyolab_url(url) is None # type: ignore
From d8247fec01fd86137f2a9b979a46618deac41768 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sun, 4 May 2025 04:08:39 +0200
Subject: [PATCH 011/165] Replace GitHub Actions build workflow with Gitea
workflow
---
.gitea/workflows/build.yml | 98 +++++++++++++++++++++++++++++++++++++
.github/workflows/build.yml | 64 ------------------------
2 files changed, 98 insertions(+), 64 deletions(-)
create mode 100644 .gitea/workflows/build.yml
delete mode 100644 .github/workflows/build.yml
diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml
new file mode 100644
index 0000000..f340331
--- /dev/null
+++ b/.gitea/workflows/build.yml
@@ -0,0 +1,98 @@
+---
+name: Test and build Docker image
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ workflow_dispatch:
+ schedule:
+ - cron: "@daily"
+
+env:
+ TEST_WEBHOOK_URL: ${{ secrets.TEST_WEBHOOK_URL }}
+
+jobs:
+ docker:
+ runs-on: ubuntu-latest
+ steps:
+ # GitHub Container Registry
+ - uses: https://github.com/docker/login-action@v3
+ if: github.event_name != 'pull_request'
+ with:
+ registry: ghcr.io
+ username: thelovinator1
+ password: ${{ secrets.PACKAGES_WRITE_GITHUB_TOKEN }}
+
+ # Gitea Container Registry
+ - uses: https://github.com/docker/login-action@v3
+ if: github.event_name != 'pull_request'
+ with:
+ registry: git.lovinator.space
+ username: thelovinator
+ password: ${{ secrets.PACKAGES_WRITE_GITEA_TOKEN }}
+
+ # Download the latest commit from the master branch
+ - uses: https://github.com/actions/checkout@v4
+
+ # Set up QEMU
+ - id: qemu
+ uses: https://github.com/docker/setup-qemu-action@v3
+ with:
+ image: tonistiigi/binfmt:master
+ platforms: linux/amd64,linux/arm64
+ cache-image: false
+
+ # Set up Buildx so we can build multi-arch images
+ - uses: https://github.com/docker/setup-buildx-action@v3
+
+ # Install the latest version of ruff
+ - uses: https://github.com/astral-sh/ruff-action@v3
+ with:
+ version: "latest"
+
+ # Lint the Python code using ruff
+ - run: ruff check --exit-non-zero-on-fix --verbose
+
+ # Check if the Python code needs formatting
+ - run: ruff format --check --verbose
+
+ # Lint Dockerfile
+ - run: docker build --check .
+
+ # Set up Python 3.13
+ - uses: actions/setup-python@v5
+ with:
+ python-version: 3.13
+
+ # Install dependencies
+ - uses: astral-sh/setup-uv@v5
+ with:
+ version: "latest"
+ - run: uv sync --all-extras --all-groups
+
+ # Run tests
+ - run: uv run pytest
+
+ # Extract metadata (tags, labels) from Git reference and GitHub events for Docker
+ - id: meta
+ uses: https://github.com/docker/metadata-action@v5
+ env:
+ DOCKER_METADATA_ANNOTATIONS_LEVELS: manifest,index
+ with:
+ images: |
+ ghcr.io/thelovinator1/discord-rss-bot
+ git.lovinator.space/thelovinator/discord-rss-bot
+ tags: |
+ type=raw,value=latest,enable=${{ gitea.ref == format('refs/heads/{0}', 'master') }}
+ type=raw,value=master,enable=${{ gitea.ref == format('refs/heads/{0}', 'master') }}
+
+ # Build and push the Docker image
+ - uses: https://github.com/docker/build-push-action@v6
+ with:
+ context: .
+ platforms: linux/amd64,linux/arm64
+ push: ${{ gitea.event_name != 'pull_request' }}
+ labels: ${{ steps.meta.outputs.labels }}
+ tags: ${{ steps.meta.outputs.tags }}
+ annotations: ${{ steps.meta.outputs.annotations }}
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index 7f0ea6d..0000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,64 +0,0 @@
----
-name: Test and build Docker image
-on:
- push:
- pull_request:
- workflow_dispatch:
- schedule:
- - cron: "0 6 * * *"
-
-env:
- TEST_WEBHOOK_URL: ${{ secrets.TEST_WEBHOOK_URL }}
-
-jobs:
- test:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: actions/setup-python@v5
- with:
- python-version: 3.12
- - uses: astral-sh/setup-uv@v5
- with:
- version: "latest"
- - run: uv sync --all-extras --all-groups
- - run: uv run pytest
- ruff:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- - uses: astral-sh/ruff-action@v3
- with:
- version: "latest"
- - run: ruff check --exit-non-zero-on-fix --verbose
- - run: ruff format --check --verbose
-
- build:
- runs-on: ubuntu-latest
- permissions:
- contents: read
- packages: write
- if: github.event_name != 'pull_request'
- concurrency:
- group: ${{ github.workflow }}-${{ github.ref }}
- cancel-in-progress: true
- needs: [test, ruff]
- steps:
- - uses: actions/checkout@v4
- - uses: docker/setup-qemu-action@v3
- with:
- platforms: all
- - uses: docker/setup-buildx-action@v3
- - uses: docker/login-action@v3
- with:
- registry: ghcr.io
- username: ${{ github.repository_owner }}
- password: ${{ secrets.GITHUB_TOKEN }}
- - uses: docker/build-push-action@v6
- with:
- context: .
- platforms: linux/amd64, linux/arm64
- push: ${{ github.event_name != 'pull_request' }}
- tags: |
- ghcr.io/thelovinator1/discord-rss-bot:latest
- ghcr.io/thelovinator1/discord-rss-bot:master
From c3a11f55b068ca38ee6cffcb48dc96a593f6bc0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sun, 4 May 2025 05:28:37 +0200
Subject: [PATCH 012/165] Update Docker healthcheck
---
.vscode/launch.json | 6 +++++-
Dockerfile | 1 +
docker-compose.yml | 2 +-
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 781b0bd..bb222ab 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -8,7 +8,11 @@
"module": "uvicorn",
"args": [
"discord_rss_bot.main:app",
- "--reload"
+ "--reload",
+ "--host",
+ "0.0.0.0",
+ "--port",
+ "5000",
],
"jinja": true,
"justMyCode": true
diff --git a/Dockerfile b/Dockerfile
index adaf76c..0905265 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,4 +12,5 @@ RUN --mount=type=cache,target=/root/.cache/uv \
COPY --chown=botuser:botuser discord_rss_bot/ /home/botuser/discord-rss-bot/discord_rss_bot/
EXPOSE 5000
VOLUME ["/home/botuser/.local/share/discord_rss_bot/"]
+HEALTHCHECK --interval=10m --timeout=5s CMD ["uv", "run", "./discord_rss_bot/healthcheck.py"]
CMD ["uv", "run", "uvicorn", "discord_rss_bot.main:app", "--host=0.0.0.0", "--port=5000", "--proxy-headers", "--forwarded-allow-ips='*'", "--log-level", "debug"]
diff --git a/docker-compose.yml b/docker-compose.yml
index 837ed0b..6b92975 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,7 +10,7 @@ services:
# - /Docker/Bots/discord-rss-bot:/home/botuser/.local/share/discord_rss_bot/
- data:/home/botuser/.local/share/discord_rss_bot/
healthcheck:
- test: ["CMD", "python", "discord_rss_bot/healthcheck.py"]
+ test: [ "CMD", "uv", "run", "./discord_rss_bot/healthcheck.py" ]
interval: 1m
timeout: 10s
retries: 3
From 7f9c934d08fc5861dc1c7e263c84e01d8cdb9c1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sun, 4 May 2025 16:50:29 +0200
Subject: [PATCH 013/165] Also use custom feed stuff if sent from
send_to_discord
---
discord_rss_bot/feeds.py | 31 ++++++++++++++++++++++++++-----
1 file changed, 26 insertions(+), 5 deletions(-)
diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py
index 90350b0..203e7b5 100644
--- a/discord_rss_bot/feeds.py
+++ b/discord_rss_bot/feeds.py
@@ -289,7 +289,7 @@ def set_entry_as_read(reader: Reader, entry: Entry) -> None:
logger.exception("Error setting entry to read: %s", entry.id)
-def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = None, *, do_once: bool = False) -> None:
+def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = None, *, do_once: bool = False) -> None: # noqa: PLR0912
"""Send entries to Discord.
If response was not ok, we will log the error and mark the entry as unread, so it will be sent again next time.
@@ -320,6 +320,11 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
continue
should_send_embed: bool = should_send_embed_check(reader, entry)
+
+ # Youtube feeds only need to send the link
+ if is_youtube_feed(entry.feed.url):
+ should_send_embed = False
+
if should_send_embed:
webhook = create_embed_webhook(webhook_url, entry)
else:
@@ -341,12 +346,28 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
continue
# Check if the feed has a whitelist, and if it does, check if the entry is whitelisted.
- if has_white_tags(reader, entry.feed):
- if should_be_sent(reader, entry):
- execute_webhook(webhook, entry)
- return
+ if has_white_tags(reader, entry.feed) and not should_be_sent(reader, entry):
+ logger.info("Entry was not whitelisted: %s", entry.id)
continue
+ # Use a custom webhook for Hoyolab feeds.
+ if is_c3kay_feed(entry.feed.url):
+ entry_link: str | None = entry.link
+ if entry_link:
+ post_id: str | None = extract_post_id_from_hoyolab_url(entry_link)
+ if post_id:
+ post_data: dict[str, Any] | None = fetch_hoyolab_post(post_id)
+ if post_data:
+ webhook = create_hoyolab_webhook(webhook_url, entry, post_data)
+ execute_webhook(webhook, entry)
+ return
+ logger.warning(
+ "Failed to create Hoyolab webhook for feed %s, falling back to regular processing",
+ entry.feed.url,
+ )
+ else:
+ logger.warning("No entry link found for feed %s, falling back to regular processing", entry.feed.url)
+
# Send the entry to Discord as it is not blacklisted or feed has a whitelist.
execute_webhook(webhook, entry)
From 901d6cb1a6e0f14c9f360c549d3b7dcf7ed76681 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Mon, 5 May 2025 01:19:52 +0200
Subject: [PATCH 014/165] Honor 429 Too Many Requests and 503 Service
Unavailable responses
---
discord_rss_bot/feeds.py | 6 +++++-
discord_rss_bot/main.py | 4 ++--
discord_rss_bot/settings.py | 9 +++++++--
3 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py
index 203e7b5..a8388a9 100644
--- a/discord_rss_bot/feeds.py
+++ b/discord_rss_bot/feeds.py
@@ -2,6 +2,7 @@ from __future__ import annotations
import datetime
import logging
+import os
import pprint
import re
from typing import TYPE_CHECKING, Any
@@ -303,7 +304,10 @@ def send_to_discord(custom_reader: Reader | None = None, feed: Feed | None = Non
reader: Reader = get_reader() if custom_reader is None else custom_reader
# Check for new entries for every feed.
- reader.update_feeds()
+ reader.update_feeds(
+ scheduled=True,
+ workers=os.cpu_count() or 1,
+ )
# Loop through the unread entries.
entries: Iterable[Entry] = reader.get_entries(feed=feed, read=False)
diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py
index 7ae706f..3103fe7 100644
--- a/discord_rss_bot/main.py
+++ b/discord_rss_bot/main.py
@@ -100,9 +100,9 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None]:
add_missing_tags(reader)
scheduler: AsyncIOScheduler = AsyncIOScheduler()
- # Update all feeds every 15 minutes.
+ # Run job every minute to check for new entries. Feeds will be checked every 15 minutes.
# TODO(TheLovinator): Make this configurable.
- scheduler.add_job(send_to_discord, "interval", minutes=15, next_run_time=datetime.now(tz=UTC))
+ scheduler.add_job(send_to_discord, "interval", minutes=1, next_run_time=datetime.now(tz=UTC))
scheduler.start()
logger.info("Scheduler started.")
yield
diff --git a/discord_rss_bot/settings.py b/discord_rss_bot/settings.py
index a99733e..d730b10 100644
--- a/discord_rss_bot/settings.py
+++ b/discord_rss_bot/settings.py
@@ -24,7 +24,7 @@ default_custom_embed: dict[str, str] = {
}
-@lru_cache
+@lru_cache(maxsize=1)
def get_reader(custom_location: Path | None = None) -> Reader:
"""Get the reader.
@@ -35,5 +35,10 @@ def get_reader(custom_location: Path | None = None) -> Reader:
The reader.
"""
db_location: Path = custom_location or Path(data_dir) / "db.sqlite"
+ reader: Reader = make_reader(url=str(db_location))
- return make_reader(url=str(db_location))
+ # https://reader.readthedocs.io/en/latest/api.html#reader.types.UpdateConfig
+ # Set the update interval to 15 minutes
+ reader.set_tag((), ".reader.update", {"interval": 15})
+
+ return reader
From 96bcd81191d589f69eb538417b93163486cc3c08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sat, 17 May 2025 03:53:15 +0200
Subject: [PATCH 015/165] Use ATX headers instead of SETEXT
---
discord_rss_bot/custom_message.py | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/discord_rss_bot/custom_message.py b/discord_rss_bot/custom_message.py
index d3ca74d..99a7e11 100644
--- a/discord_rss_bot/custom_message.py
+++ b/discord_rss_bot/custom_message.py
@@ -68,8 +68,18 @@ def replace_tags_in_text_message(entry: Entry) -> str:
first_image: str = get_first_image(summary, content)
- summary = markdownify(html=summary, strip=["img", "table", "td", "tr", "tbody", "thead"], escape_misc=False)
- content = markdownify(html=content, strip=["img", "table", "td", "tr", "tbody", "thead"], escape_misc=False)
+ summary = markdownify(
+ html=summary,
+ strip=["img", "table", "td", "tr", "tbody", "thead"],
+ escape_misc=False,
+ heading_style="ATX",
+ )
+ content = markdownify(
+ html=content,
+ strip=["img", "table", "td", "tr", "tbody", "thead"],
+ escape_misc=False,
+ heading_style="ATX",
+ )
if "[https://" in content or "[https://www." in content:
content = content.replace("[https://", "[")
@@ -189,8 +199,18 @@ def replace_tags_in_embed(feed: Feed, entry: Entry) -> CustomEmbed:
first_image: str = get_first_image(summary, content)
- summary = markdownify(html=summary, strip=["img", "table", "td", "tr", "tbody", "thead"], escape_misc=False)
- content = markdownify(html=content, strip=["img", "table", "td", "tr", "tbody", "thead"], escape_misc=False)
+ summary = markdownify(
+ html=summary,
+ strip=["img", "table", "td", "tr", "tbody", "thead"],
+ escape_misc=False,
+ heading_style="ATX",
+ )
+ content = markdownify(
+ html=content,
+ strip=["img", "table", "td", "tr", "tbody", "thead"],
+ escape_misc=False,
+ heading_style="ATX",
+ )
if "[https://" in content or "[https://www." in content:
content = content.replace("[https://", "[")
From 2a6dbd33ddc331d4d73f25bb6315fe6d93af1979 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Hells=C3=A9n?=
Date: Sat, 17 May 2025 03:58:08 +0200
Subject: [PATCH 016/165] Add button for manually updating feed
---
discord_rss_bot/main.py | 23 +++++++++++++++++
discord_rss_bot/templates/feed.html | 2 ++
tests/test_main.py | 38 +++++++++++++++++++++++++++++
3 files changed, 63 insertions(+)
diff --git a/discord_rss_bot/main.py b/discord_rss_bot/main.py
index 3103fe7..4b4b631 100644
--- a/discord_rss_bot/main.py
+++ b/discord_rss_bot/main.py
@@ -921,6 +921,29 @@ async def remove_feed(feed_url: Annotated[str, Form()]):
return RedirectResponse(url="/", status_code=303)
+@app.get("/update", response_class=HTMLResponse)
+async def update_feed(request: Request, feed_url: str):
+ """Update a feed.
+
+ Args:
+ request: The request object.
+ feed_url: The feed URL to update.
+
+ Raises:
+ HTTPException: If the feed is not found.
+
+ Returns:
+ RedirectResponse: Redirect to the feed page.
+ """
+ try:
+ reader.update_feed(urllib.parse.unquote(feed_url))
+ except FeedNotFoundError as e:
+ raise HTTPException(status_code=404, detail="Feed not found") from e
+
+ logger.info("Manually updated feed: %s", feed_url)
+ return RedirectResponse(url="/feed?feed_url=" + urllib.parse.quote(feed_url), status_code=303)
+
+
@app.get("/search", response_class=HTMLResponse)
async def search(request: Request, query: str):
"""Get entries matching a full-text search query.
diff --git a/discord_rss_bot/templates/feed.html b/discord_rss_bot/templates/feed.html
index ce983ff..340a8a3 100644
--- a/discord_rss_bot/templates/feed.html
+++ b/discord_rss_bot/templates/feed.html
@@ -28,6 +28,8 @@