diff --git a/.vscode/settings.json b/.vscode/settings.json index f929fff..85832f8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,8 @@ "cSpell.words": [ "botuser", "Genshins", + "healthcheck", + "Hoyolab", "levelname", "Lovinator", "markdownified", diff --git a/README.md b/README.md index 849fb98..8232dea 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,20 @@ Subscribe to RSS feeds and get updates to a Discord webhook. -> [!NOTE] -> You should look at [MonitoRSS](https://github.com/synzen/monitorss) for a more feature-rich project. +## Features + +- Subscribe to RSS feeds and get updates to a Discord webhook. +- Web interface to manage subscriptions. +- Customizable message format for each feed. +- Choose between Discord embed or plain text. +- Regex filters for RSS feeds. +- Blacklist/whitelist words in the title/description/author/etc. +- Gets extra information from APIs if available, currently for: + - [https://feeds.c3kay.de/](https://feeds.c3kay.de/) + - Genshin Impact News + - Honkai Impact 3rd News + - Honkai Starrail News + - Zenless Zone Zero News ## Installation diff --git a/discord_rss_bot/custom_message.py b/discord_rss_bot/custom_message.py index 9cb03e5..d3ca74d 100644 --- a/discord_rss_bot/custom_message.py +++ b/discord_rss_bot/custom_message.py @@ -152,14 +152,7 @@ def get_first_image(summary: str | None, content: str | None) -> str: logger.warning("Invalid URL: %s", src) continue - # Genshins first image is a divider, so we ignore it. - # https://hyl-static-res-prod.hoyolab.com/divider_config/PC/line_3.png - skip_images: list[str] = [ - "https://img-os-static.hoyolab.com/divider_config/", - "https://hyl-static-res-prod.hoyolab.com/divider_config/", - ] - if not str(image.attrs["src"]).startswith(tuple(skip_images)): - return str(image.attrs["src"]) + return str(image.attrs["src"]) if summary and (images := BeautifulSoup(summary, features="lxml").find_all("img")): for image in images: if not isinstance(image, Tag) or "src" not in image.attrs: @@ -170,9 +163,7 @@ def get_first_image(summary: str | None, content: str | None) -> str: logger.warning("Invalid URL: %s", image.attrs["src"]) continue - # Genshins first image is a divider, so we ignore it. - if not str(image.attrs["src"]).startswith("https://img-os-static.hoyolab.com/divider_config"): - return str(image.attrs["src"]) + return str(image.attrs["src"]) return "" diff --git a/discord_rss_bot/feeds.py b/discord_rss_bot/feeds.py index 7852b0d..90350b0 100644 --- a/discord_rss_bot/feeds.py +++ b/discord_rss_bot/feeds.py @@ -4,7 +4,7 @@ import datetime import logging import pprint import re -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from urllib.parse import ParseResult, urlparse import tldextract @@ -20,6 +20,12 @@ from discord_rss_bot.custom_message import ( ) from discord_rss_bot.filter.blacklist import entry_should_be_skipped from discord_rss_bot.filter.whitelist import has_white_tags, should_be_sent +from discord_rss_bot.hoyolab_api import ( + create_hoyolab_webhook, + extract_post_id_from_hoyolab_url, + fetch_hoyolab_post, + is_c3kay_feed, +) from discord_rss_bot.is_url_valid import is_url_valid from discord_rss_bot.missing_tags import add_missing_tags from discord_rss_bot.settings import default_custom_message, get_reader @@ -81,7 +87,7 @@ def extract_domain(url: str) -> str: # noqa: PLR0911 return "Other" -def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) -> str | None: +def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) -> str | None: # noqa: PLR0912 """Send a single entry to Discord. Args: @@ -99,6 +105,24 @@ def send_entry_to_discord(entry: Entry, custom_reader: Reader | None = None) -> if not webhook_url: return "No webhook URL found." + # Check if this is a c3kay feed + if is_c3kay_feed(entry.feed.url): + entry_link: str | None = entry.link + if entry_link: + post_id: str | None = extract_post_id_from_hoyolab_url(entry_link) + if post_id: + post_data: dict[str, Any] | None = fetch_hoyolab_post(post_id) + if post_data: + webhook = create_hoyolab_webhook(webhook_url, entry, post_data) + execute_webhook(webhook, entry) + return None + logger.warning( + "Failed to create Hoyolab webhook for feed %s, falling back to regular processing", + entry.feed.url, + ) + else: + logger.warning("No entry link found for feed %s, falling back to regular processing", entry.feed.url) + webhook_message: str = "" # Try to get the custom message for the feed. If the user has none, we will use the default message. diff --git a/discord_rss_bot/hoyolab_api.py b/discord_rss_bot/hoyolab_api.py new file mode 100644 index 0000000..cb1ed71 --- /dev/null +++ b/discord_rss_bot/hoyolab_api.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +import contextlib +import json +import logging +import re +from typing import TYPE_CHECKING, Any + +import requests +from discord_webhook import DiscordEmbed, DiscordWebhook + +if TYPE_CHECKING: + from reader import Entry + + +logger: logging.Logger = logging.getLogger(__name__) + + +def is_c3kay_feed(feed_url: str) -> bool: + """Check if the feed is from c3kay.de. + + Args: + feed_url: The feed URL to check. + + Returns: + bool: True if the feed is from c3kay.de, False otherwise. + """ + return "feeds.c3kay.de" in feed_url + + +def extract_post_id_from_hoyolab_url(url: str) -> str | None: + """Extract the post ID from a Hoyolab URL. + + Args: + url: The Hoyolab URL to extract the post ID from. + For example: https://www.hoyolab.com/article/38588239 + + Returns: + str | None: The post ID if found, None otherwise. + """ + try: + match: re.Match[str] | None = re.search(r"/article/(\d+)", url) + if match: + return match.group(1) + except (ValueError, AttributeError, TypeError) as e: + logger.warning("Error extracting post ID from Hoyolab URL %s: %s", url, e) + + return None + + +def fetch_hoyolab_post(post_id: str) -> dict[str, Any] | None: + """Fetch post data from the Hoyolab API. + + Args: + post_id: The post ID to fetch. + + Returns: + dict[str, Any] | None: The post data if successful, None otherwise. + """ + if not post_id: + return None + + http_ok = 200 + try: + url: str = f"https://bbs-api-os.hoyolab.com/community/post/wapi/getPostFull?post_id={post_id}" + response: requests.Response = requests.get(url, timeout=10) + + if response.status_code == http_ok: + data: dict[str, Any] = response.json() + if data.get("retcode") == 0 and "data" in data and "post" in data["data"]: + return data["data"]["post"] + + logger.warning("Failed to fetch Hoyolab post %s: %s", post_id, response.text) + except (requests.RequestException, ValueError): + logger.exception("Error fetching Hoyolab post %s", post_id) + + return None + + +def create_hoyolab_webhook(webhook_url: str, entry: Entry, post_data: dict[str, Any]) -> DiscordWebhook: # noqa: C901, PLR0912, PLR0914, PLR0915 + """Create a webhook with data from the Hoyolab API. + + Args: + webhook_url: The webhook URL. + entry: The entry to send to Discord. + post_data: The post data from the Hoyolab API. + + Returns: + DiscordWebhook: The webhook with the embed. + """ + entry_link: str = entry.link or entry.feed.url + webhook = DiscordWebhook(url=webhook_url, rate_limit_retry=True) + + # Extract relevant data from the post + post: dict[str, Any] = post_data.get("post", {}) + subject: str = post.get("subject", "") + content: str = post.get("content", "{}") + + logger.debug("Post subject: %s", subject) + logger.debug("Post content: %s", content) + + content_data: dict[str, str] = {} + with contextlib.suppress(json.JSONDecodeError, ValueError): + content_data = json.loads(content) + + logger.debug("Content data: %s", content_data) + + description: str = content_data.get("describe", "") + if not description: + description = post.get("desc", "") + + # Create the embed + discord_embed = DiscordEmbed() + + # Set title and description + discord_embed.set_title(subject) + discord_embed.set_url(entry_link) + + # Get post.image_list + image_list: list[dict[str, Any]] = post_data.get("image_list", []) + if image_list: + image_url: str = str(image_list[0].get("url", "")) + image_height: int = int(image_list[0].get("height", 1080)) + image_width: int = int(image_list[0].get("width", 1920)) + + logger.debug("Image URL: %s, Height: %s, Width: %s", image_url, image_height, image_width) + discord_embed.set_image(url=image_url, height=image_height, width=image_width) + + video: dict[str, str | int | bool] = post_data.get("video", {}) + if video and video.get("url"): + video_url: str = str(video.get("url", "")) + logger.debug("Video URL: %s", video_url) + with contextlib.suppress(requests.RequestException): + video_response: requests.Response = requests.get(video_url, stream=True, timeout=10) + if video_response.ok: + webhook.add_file( + file=video_response.content, + filename=f"{entry.id}.mp4", + ) + + game = post_data.get("game", {}) + + if game and game.get("color"): + game_color = str(game.get("color", "")) + discord_embed.set_color(game_color.removeprefix("#")) + + user: dict[str, str | int | bool] = post_data.get("user", {}) + author_name: str = str(user.get("nickname", "")) + avatar_url: str = str(user.get("avatar_url", "")) + if author_name: + webhook.avatar_url = avatar_url + webhook.username = author_name + + classification = post_data.get("classification", {}) + + if classification and classification.get("name"): + footer = str(classification.get("name", "")) + discord_embed.set_footer(text=footer) + + webhook.add_embed(discord_embed) + + # Only show Youtube URL if available + structured_content: str = post.get("structured_content", "") + if structured_content: # noqa: PLR1702 + try: + structured_content_data: list[dict[str, Any]] = json.loads(structured_content) + for item in structured_content_data: + if item.get("insert") and isinstance(item["insert"], dict): + video_url: str = str(item["insert"].get("video", "")) + if video_url: + video_id_match: re.Match[str] | None = re.search(r"embed/([a-zA-Z0-9_-]+)", video_url) + if video_id_match: + video_id: str = video_id_match.group(1) + logger.debug("Video ID: %s", video_id) + webhook.content = f"https://www.youtube.com/watch?v={video_id}" + webhook.remove_embeds() + + except (json.JSONDecodeError, ValueError) as e: + logger.warning("Error parsing structured content: %s", e) + + event_start_date: str = post.get("event_start_date", "") + if event_start_date and event_start_date != "0": + discord_embed.add_embed_field(name="Start", value=f"") + + event_end_date: str = post.get("event_end_date", "") + if event_end_date and event_end_date != "0": + discord_embed.add_embed_field(name="End", value=f"") + + created_at: str = post.get("created_at", "") + if created_at and created_at != "0": + discord_embed.set_timestamp(timestamp=created_at) + + return webhook diff --git a/tests/test_hoyolab_api.py b/tests/test_hoyolab_api.py new file mode 100644 index 0000000..60c83ae --- /dev/null +++ b/tests/test_hoyolab_api.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from discord_rss_bot.hoyolab_api import extract_post_id_from_hoyolab_url + + +class TestExtractPostIdFromHoyolabUrl: + def test_extract_post_id_from_article_url(self) -> None: + """Test extracting post ID from a direct article URL.""" + test_cases: list[str] = [ + "https://www.hoyolab.com/article/38588239", + "http://hoyolab.com/article/12345", + "https://www.hoyolab.com/article/987654321/comments", + ] + + expected_ids: list[str] = ["38588239", "12345", "987654321"] + + for url, expected_id in zip(test_cases, expected_ids, strict=False): + assert extract_post_id_from_hoyolab_url(url) == expected_id + + def test_url_without_post_id(self) -> None: + """Test with a URL that doesn't have a post ID.""" + test_cases: list[str] = [ + "https://www.hoyolab.com/community", + ] + + for url in test_cases: + assert extract_post_id_from_hoyolab_url(url) is None + + def test_edge_cases(self) -> None: + """Test edge cases like None, empty string, and malformed URLs.""" + test_cases: list[str | None] = [ + None, + "", + "not_a_url", + "http:/", # Malformed URL + ] + + for url in test_cases: + assert extract_post_id_from_hoyolab_url(url) is None # type: ignore