Use HTML instead of markdown

This commit is contained in:
2025-05-16 23:50:07 +02:00
parent 45d36b3bae
commit 9511970cc6
299 changed files with 44321 additions and 18001 deletions

View File

@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any, Literal
import aiofiles
import httpx
import markdown
import mdformat
from bs4 import BeautifulSoup
from markdownify import MarkdownConverter # pyright: ignore[reportMissingTypeStubs]
from markupsafe import Markup, escape
@ -361,7 +363,7 @@ def handle_stars(text: str) -> str:
return "\n\n".join(output)
def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str: # noqa: PLR0914
def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str: # noqa: PLR0914, PLR0915
"""Generate an Atom feed from a list of articles.
Args:
@ -492,24 +494,46 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
article_category: str = article.get("articleTypeName", "Wuthering Waves")
category: str = f'<category term="{escape(article_category)}"/>' if article_category else ""
html: str = markdown.markdown(
text=article_escaped,
extensions=[
"markdown.extensions.sane_lists",
],
)
atom_entries.append(
f"""
<entry>
<id>{entry_id}</id>
<title>{escape(article_title)}</title>
<link href="{article_url}" rel="alternate" type="text/html"/>
<content type="text">{article_escaped}</content>
{published}
<updated>{updated}</updated>
{category}
<author>
<name>Wuthering Waves</name>
<email>wutheringwaves_ensupport@kurogames.com</email>
<uri>https://wutheringwaves.kurogames.com</uri>
</author>
</entry>""",
<entry>
<id>{entry_id}</id>
<title>{escape(article_title)}</title>
<link href="{article_url}" rel="alternate" type="text/html"/>
<content type="html"><![CDATA[{html}]]></content>
{published}
<updated>{updated}</updated>
{category}
<author>
<name>Wuthering Waves</name>
<email>wutheringwaves_ensupport@kurogames.com</email>
<uri>https://wutheringwaves.kurogames.com</uri>
</author>
</entry>
""",
)
# If HTML not already saved to /html, save it
html_dir: Path = Path("html")
html_dir.mkdir(exist_ok=True)
html_file: Path = html_dir / f"{article_id}.html"
if not html_file.is_file():
with html_file.open("w", encoding="utf-8") as f:
f.write(str(BeautifulSoup(html, "html.parser").prettify()))
logger.info("Saved HTML for article %s to %s", article_id, html_file)
# Set the file timestamp
if not set_file_timestamp(html_file, article_create_time):
logger.error("Failed to set timestamp for %s", html_file)
# Create the complete Atom feed
atom_feed: str = f"""<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
@ -529,7 +553,8 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
<uri>https://wutheringwaves.kurogames.com</uri>
</author>
{"".join(atom_entries)}
</feed>""" # noqa: E501
</feed>
""" # noqa: E501
return atom_feed