mirror of
https://github.com/TheLovinator1/wutheringwaves.git
synced 2025-11-27 20:49:48 +01:00
Compare commits
8 Commits
47a39bd061
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef4bf32318 | ||
|
|
67882d49a9 | ||
|
|
a784807a90 | ||
|
|
0ddb59e727 | ||
|
|
f1924f38ad | ||
|
|
c972592a2f | ||
|
|
3eb76ac54c | ||
|
|
060fb9c1ac |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1 +1,6 @@
|
|||||||
articles/ArticleMenu.json
|
articles/ArticleMenu.json
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.pyd
|
||||||
|
.Python
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ Stay updated with the latest Wuthering Waves news through these RSS feeds:
|
|||||||
|
|
||||||
## Articles
|
## Articles
|
||||||
|
|
||||||
|
- [Tidal Photography: Capture Your Action Highlights of Resonators!](https://wutheringwaves.kurogames.com/en/main/news/detail/3532) [[json]](articles/3532.json)
|
||||||
- [[Freeze Frame: Action Highlights] Combat Photography Event](https://wutheringwaves.kurogames.com/en/main/news/detail/3523) [[json]](articles/3523.json)
|
- [[Freeze Frame: Action Highlights] Combat Photography Event](https://wutheringwaves.kurogames.com/en/main/news/detail/3523) [[json]](articles/3523.json)
|
||||||
- [[Absolute Pulsation] Featured Weapon Convene: Boosted Drop Rate for Blazing Justice](https://wutheringwaves.kurogames.com/en/main/news/detail/3516) [[json]](articles/3516.json)
|
- [[Absolute Pulsation] Featured Weapon Convene: Boosted Drop Rate for Blazing Justice](https://wutheringwaves.kurogames.com/en/main/news/detail/3516) [[json]](articles/3516.json)
|
||||||
- [[Between Light and Shadow] Featured Resonator Convene: Boosted Drop Rate for Zani](https://wutheringwaves.kurogames.com/en/main/news/detail/3509) [[json]](articles/3509.json)
|
- [[Between Light and Shadow] Featured Resonator Convene: Boosted Drop Rate for Zani](https://wutheringwaves.kurogames.com/en/main/news/detail/3509) [[json]](articles/3509.json)
|
||||||
|
|||||||
14
articles/3532.json
generated
Normal file
14
articles/3532.json
generated
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"articleContent": "<div>\n<div>\n<div data-zone-id=\"0\" data-line-index=\"0\" data-line=\"true\">\n<div>\n<div data-zone-id=\"0\" data-line-index=\"0\" data-line=\"true\">Freeze Frame: Action Highlights - Combat Photography Event is now underway!</div>\n</div>\n</div>\n<div data-zone-id=\"0\" data-line-index=\"0\" data-line=\"true\">Capture Resonators' exhilarating moments during combat with your camera!</div>\n<div data-zone-id=\"0\" data-line-index=\"1\" data-line=\"true\"> </div>\n<div data-zone-id=\"0\" data-line-index=\"2\" data-line=\"true\">✦Duration✦</div>\n<div data-zone-id=\"0\" data-line-index=\"3\" data-line=\"true\">From now until 2025-11-19 23:59 (UTC+8)</div>\n<div data-zone-id=\"0\" data-line-index=\"4\" data-line=\"true\"> </div>\n<div data-zone-id=\"0\" data-line-index=\"5\" data-line=\"true\">✦How to Submit✦</div>\n<div data-zone-id=\"0\" data-line-index=\"6\" data-line=\"true\">You can submit your work in the following ways:</div>\n<div data-zone-id=\"0\" data-line-index=\"7\" data-line=\"true\">1. Use the [Settings - Camera] feature in Wuthering Waves or take a screenshot to capture in-game footage. Apply your preferred filter and frame the Resonators' battle moments.</div>\n<div data-zone-id=\"0\" data-line-index=\"8\" data-line=\"true\">2. Join the \"Freeze Frame: Action Highlights\" Combat Photography Event. Choose your desired Resonator and use the Slow Motion Gadget to capture their battle scenes.</div>\n<div data-zone-id=\"0\" data-line-index=\"9\" data-line=\"true\">Post your work with both hashtags #WutheringWaves and #WuWaMoment included and join the event.</div>\n<div data-zone-id=\"0\" data-line-index=\"10\" data-line=\"true\"> </div>\n<div data-zone-id=\"0\" data-line-index=\"11\" data-line=\"true\">✦Rewards✦</div>\n<div data-zone-id=\"0\" data-line-index=\"12\" data-line=\"true\">We'll select winners based on the quality and statistics of the entries. Winners will receive Astrites and cash incentives!</div>\n<div data-zone-id=\"0\" data-line-index=\"13\" data-line=\"true\"> </div>\n<div data-zone-id=\"0\" data-line-index=\"14\" data-line=\"true\">Check the image for more detailed info and proceed to submit: https://forms.gle/8foXGMp3p9pzj1Tb9</div>\n<div data-zone-id=\"0\" data-line-index=\"14\" data-line=\"true\"> </div>\n<div data-zone-id=\"0\" data-line-index=\"14\" data-line=\"true\"><img style=\"display: block; margin-left: auto; margin-right: auto;\" src=\"https://hw-media-cdn-mingchao.kurogame.com/object/1761667200000/e6ytovb1kgx824jkwo-1761735233381.jpg\" width=\"800\" data-media-source=\"production_G152_en\" data-desc=\"succ\"></div>\n<div data-zone-id=\"0\" data-line-index=\"14\" data-line=\"true\"> </div>\n<div data-zone-id=\"0\" data-line-index=\"14\" data-line=\"true\"> </div>\n</div>\n</div>",
|
||||||
|
"articleId": 3532,
|
||||||
|
"articleTitle": "Tidal Photography: Capture Your Action Highlights of Resonators!",
|
||||||
|
"articleType": 58,
|
||||||
|
"articleTypeName": "Notice",
|
||||||
|
"endTime": "2099-01-01 00:00:00",
|
||||||
|
"gameId": "G152-en",
|
||||||
|
"startTime": "2025-10-30 12:00:00",
|
||||||
|
"articleDesc": "",
|
||||||
|
"createTime": "2025-10-29 18:55:04",
|
||||||
|
"suggestCover": "",
|
||||||
|
"top": 0
|
||||||
|
}
|
||||||
35
articles_all.xml
generated
35
articles_all.xml
generated
@@ -4,7 +4,7 @@
|
|||||||
<link href="https://wutheringwaves.kurogames.com/en/main/news/" rel="alternate" type="text/html"/>
|
<link href="https://wutheringwaves.kurogames.com/en/main/news/" rel="alternate" type="text/html"/>
|
||||||
<link href="https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/articles_all.xml" rel="self" type="application/atom+xml"/>
|
<link href="https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/articles_all.xml" rel="self" type="application/atom+xml"/>
|
||||||
<id>urn:wutheringwaves:feed</id>
|
<id>urn:wutheringwaves:feed</id>
|
||||||
<updated>2025-10-29T16:18:48+00:00</updated>
|
<updated>2025-10-29T18:55:04+00:00</updated>
|
||||||
<subtitle>Latest articles from Wuthering Waves</subtitle>
|
<subtitle>Latest articles from Wuthering Waves</subtitle>
|
||||||
<icon>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</icon>
|
<icon>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</icon>
|
||||||
<logo>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</logo>
|
<logo>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</logo>
|
||||||
@@ -16,6 +16,39 @@
|
|||||||
<uri>https://wutheringwaves.kurogames.com</uri>
|
<uri>https://wutheringwaves.kurogames.com</uri>
|
||||||
</author>
|
</author>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<id>urn:article:3532</id>
|
||||||
|
<title>Tidal Photography: Capture Your Action Highlights of Resonators!</title>
|
||||||
|
<link href="https://wutheringwaves.kurogames.com/en/main/news/detail/3532" rel="alternate" type="text/html"/>
|
||||||
|
<content type="html"><![CDATA[<p>Freeze Frame: Action Highlights - Combat Photography Event is now underway!</p>
|
||||||
|
<p>Capture Resonators' exhilarating moments during combat with your camera!</p>
|
||||||
|
<h1>Duration</h1>
|
||||||
|
<p>From now until 2025-11-19 23:59 (UTC+8)</p>
|
||||||
|
<h1>How to Submit</h1>
|
||||||
|
<p>You can submit your work in the following ways:</p>
|
||||||
|
<ol>
|
||||||
|
<li>
|
||||||
|
<p>Use the [Settings - Camera] feature in Wuthering Waves or take a screenshot to capture in-game footage. Apply your preferred filter and frame the Resonators' battle moments.</p>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<p>Join the "Freeze Frame: Action Highlights" Combat Photography Event. Choose your desired Resonator and use the Slow Motion Gadget to capture their battle scenes.</p>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
<p>Post your work with both hashtags #WutheringWaves and #WuWaMoment included and join the event.</p>
|
||||||
|
<h1>Rewards</h1>
|
||||||
|
<p>We'll select winners based on the quality and statistics of the entries. Winners will receive Astrites and cash incentives!</p>
|
||||||
|
<p>Check the image for more detailed info and proceed to submit: https://forms.gle/8foXGMp3p9pzj1Tb9</p>
|
||||||
|
<p><img alt="" src="https://hw-media-cdn-mingchao.kurogame.com/object/1761667200000/e6ytovb1kgx824jkwo-1761735233381.jpg" /></p>]]></content>
|
||||||
|
<published>2025-10-29T18:55:04+00:00</published>
|
||||||
|
<updated>2025-10-29T18:55:04+00:00</updated>
|
||||||
|
<category term="Notice"/>
|
||||||
|
<author>
|
||||||
|
<name>Wuthering Waves</name>
|
||||||
|
<email>wutheringwaves_ensupport@kurogames.com</email>
|
||||||
|
<uri>https://wutheringwaves.kurogames.com</uri>
|
||||||
|
</author>
|
||||||
|
</entry>
|
||||||
|
|
||||||
<entry>
|
<entry>
|
||||||
<id>urn:article:3523</id>
|
<id>urn:article:3523</id>
|
||||||
<title>[Freeze Frame: Action Highlights] Combat Photography Event</title>
|
<title>[Freeze Frame: Action Highlights] Combat Photography Event</title>
|
||||||
|
|||||||
74
articles_latest.xml
generated
74
articles_latest.xml
generated
@@ -4,7 +4,7 @@
|
|||||||
<link href="https://wutheringwaves.kurogames.com/en/main/news/" rel="alternate" type="text/html"/>
|
<link href="https://wutheringwaves.kurogames.com/en/main/news/" rel="alternate" type="text/html"/>
|
||||||
<link href="https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/articles_latest.xml" rel="self" type="application/atom+xml"/>
|
<link href="https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/articles_latest.xml" rel="self" type="application/atom+xml"/>
|
||||||
<id>urn:wutheringwaves:feed</id>
|
<id>urn:wutheringwaves:feed</id>
|
||||||
<updated>2025-10-29T16:18:48+00:00</updated>
|
<updated>2025-10-29T18:55:04+00:00</updated>
|
||||||
<subtitle>Latest articles from Wuthering Waves</subtitle>
|
<subtitle>Latest articles from Wuthering Waves</subtitle>
|
||||||
<icon>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</icon>
|
<icon>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</icon>
|
||||||
<logo>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</logo>
|
<logo>https://raw.githubusercontent.com/TheLovinator1/wutheringwaves/refs/heads/master/logo.png</logo>
|
||||||
@@ -16,6 +16,39 @@
|
|||||||
<uri>https://wutheringwaves.kurogames.com</uri>
|
<uri>https://wutheringwaves.kurogames.com</uri>
|
||||||
</author>
|
</author>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<id>urn:article:3532</id>
|
||||||
|
<title>Tidal Photography: Capture Your Action Highlights of Resonators!</title>
|
||||||
|
<link href="https://wutheringwaves.kurogames.com/en/main/news/detail/3532" rel="alternate" type="text/html"/>
|
||||||
|
<content type="html"><![CDATA[<p>Freeze Frame: Action Highlights - Combat Photography Event is now underway!</p>
|
||||||
|
<p>Capture Resonators' exhilarating moments during combat with your camera!</p>
|
||||||
|
<h1>Duration</h1>
|
||||||
|
<p>From now until 2025-11-19 23:59 (UTC+8)</p>
|
||||||
|
<h1>How to Submit</h1>
|
||||||
|
<p>You can submit your work in the following ways:</p>
|
||||||
|
<ol>
|
||||||
|
<li>
|
||||||
|
<p>Use the [Settings - Camera] feature in Wuthering Waves or take a screenshot to capture in-game footage. Apply your preferred filter and frame the Resonators' battle moments.</p>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<p>Join the "Freeze Frame: Action Highlights" Combat Photography Event. Choose your desired Resonator and use the Slow Motion Gadget to capture their battle scenes.</p>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
<p>Post your work with both hashtags #WutheringWaves and #WuWaMoment included and join the event.</p>
|
||||||
|
<h1>Rewards</h1>
|
||||||
|
<p>We'll select winners based on the quality and statistics of the entries. Winners will receive Astrites and cash incentives!</p>
|
||||||
|
<p>Check the image for more detailed info and proceed to submit: https://forms.gle/8foXGMp3p9pzj1Tb9</p>
|
||||||
|
<p><img alt="" src="https://hw-media-cdn-mingchao.kurogame.com/object/1761667200000/e6ytovb1kgx824jkwo-1761735233381.jpg" /></p>]]></content>
|
||||||
|
<published>2025-10-29T18:55:04+00:00</published>
|
||||||
|
<updated>2025-10-29T18:55:04+00:00</updated>
|
||||||
|
<category term="Notice"/>
|
||||||
|
<author>
|
||||||
|
<name>Wuthering Waves</name>
|
||||||
|
<email>wutheringwaves_ensupport@kurogames.com</email>
|
||||||
|
<uri>https://wutheringwaves.kurogames.com</uri>
|
||||||
|
</author>
|
||||||
|
</entry>
|
||||||
|
|
||||||
<entry>
|
<entry>
|
||||||
<id>urn:article:3523</id>
|
<id>urn:article:3523</id>
|
||||||
<title>[Freeze Frame: Action Highlights] Combat Photography Event</title>
|
<title>[Freeze Frame: Action Highlights] Combat Photography Event</title>
|
||||||
@@ -1322,43 +1355,4 @@
|
|||||||
</author>
|
</author>
|
||||||
</entry>
|
</entry>
|
||||||
|
|
||||||
<entry>
|
|
||||||
<id>urn:article:3387</id>
|
|
||||||
<title>[Absolute Pulsation] Featured Weapon Convene: Boosted Drop Rate for Lux & Umbra</title>
|
|
||||||
<link href="https://wutheringwaves.kurogames.com/en/main/news/detail/3387" rel="alternate" type="text/html"/>
|
|
||||||
<content type="html"><![CDATA[<p><img alt="" src="https://hw-media-cdn-mingchao.kurogame.com/object/1759680000000/twbimhh2yaz2tgy8ho-1759735717268.jpg" /></p>
|
|
||||||
<p>During the event, 5-Star Weapon: Lux & Umbra, 4-Star Weapons: Novaburst, Helios Cleaver, and Fusion Accretion receive boosted drop rates!</p>
|
|
||||||
<h1>Duration</h1>
|
|
||||||
<p>Version 2.7 update - 2025-10-30 09:59 (server time)</p>
|
|
||||||
<h1>Eligibility</h1>
|
|
||||||
<p>Reach Union Level 8 or complete the combat trial in ""First Resonance"" and activate the Convene function.</p>
|
|
||||||
<h1>Convene Rules</h1>
|
|
||||||
<ul>
|
|
||||||
<li>
|
|
||||||
<p>[Absolute Pulsation] is a Featured Weapon Convene event banner.</p>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<p>Use Forging Tide to Convene. You are guaranteed at least one 4-Star or higher rarity Resonator or Weapon every 10 attempts.</p>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<p>The guarantee count is shared among all Featured Weapon Convene events and is carried over until a 5-Star Weapon is obtained.</p>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<p>The 5-Star Weapon you Convene is guaranteed to be Lux & Umbra.</p>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<p>Featured Weapon Lux & Umbra is exclusive to the Featured Weapon Convene and will not be available by other means once the event ends.</p>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<p><em>For more details, please refer to the in-game Convene page.</em></p>]]></content>
|
|
||||||
<published>2025-10-06T15:28:59+00:00</published>
|
|
||||||
<updated>2025-10-06T15:28:59+00:00</updated>
|
|
||||||
<category term="Notice"/>
|
|
||||||
<author>
|
|
||||||
<name>Wuthering Waves</name>
|
|
||||||
<email>wutheringwaves_ensupport@kurogames.com</email>
|
|
||||||
<uri>https://wutheringwaves.kurogames.com</uri>
|
|
||||||
</author>
|
|
||||||
</entry>
|
|
||||||
|
|
||||||
</feed>
|
</feed>
|
||||||
|
|||||||
45
html/3532.html
generated
Normal file
45
html/3532.html
generated
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
<p>
|
||||||
|
Freeze Frame: Action Highlights - Combat Photography Event is now underway!
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Capture Resonators' exhilarating moments during combat with your camera!
|
||||||
|
</p>
|
||||||
|
<h1>
|
||||||
|
Duration
|
||||||
|
</h1>
|
||||||
|
<p>
|
||||||
|
From now until 2025-11-19 23:59 (UTC+8)
|
||||||
|
</p>
|
||||||
|
<h1>
|
||||||
|
How to Submit
|
||||||
|
</h1>
|
||||||
|
<p>
|
||||||
|
You can submit your work in the following ways:
|
||||||
|
</p>
|
||||||
|
<ol>
|
||||||
|
<li>
|
||||||
|
<p>
|
||||||
|
Use the [Settings - Camera] feature in Wuthering Waves or take a screenshot to capture in-game footage. Apply your preferred filter and frame the Resonators' battle moments.
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<p>
|
||||||
|
Join the "Freeze Frame: Action Highlights" Combat Photography Event. Choose your desired Resonator and use the Slow Motion Gadget to capture their battle scenes.
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
<p>
|
||||||
|
Post your work with both hashtags #WutheringWaves and #WuWaMoment included and join the event.
|
||||||
|
</p>
|
||||||
|
<h1>
|
||||||
|
Rewards
|
||||||
|
</h1>
|
||||||
|
<p>
|
||||||
|
We'll select winners based on the quality and statistics of the entries. Winners will receive Astrites and cash incentives!
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Check the image for more detailed info and proceed to submit: https://forms.gle/8foXGMp3p9pzj1Tb9
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
<img alt="" src="https://hw-media-cdn-mingchao.kurogame.com/object/1761667200000/e6ytovb1kgx824jkwo-1761735233381.jpg"/>
|
||||||
|
</p>
|
||||||
176
scrape.py
176
scrape.py
@@ -14,7 +14,6 @@ import aiofiles
|
|||||||
import httpx
|
import httpx
|
||||||
import markdown
|
import markdown
|
||||||
import mdformat
|
import mdformat
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from markdownify import MarkdownConverter # pyright: ignore[reportMissingTypeStubs]
|
from markdownify import MarkdownConverter # pyright: ignore[reportMissingTypeStubs]
|
||||||
from markupsafe import Markup, escape
|
from markupsafe import Markup, escape
|
||||||
|
|
||||||
@@ -28,6 +27,36 @@ logging.basicConfig(
|
|||||||
|
|
||||||
logger: logging.Logger = logging.getLogger("wutheringwaves")
|
logger: logging.Logger = logging.getLogger("wutheringwaves")
|
||||||
|
|
||||||
|
# Compile regex patterns for better performance
|
||||||
|
DISCORD_LINK_PATTERN = re.compile(r'\[([^\]]+)\]\((https?://[^\s)]+) "\2"\)')
|
||||||
|
SQUARE_BRACKETS_PATTERN = re.compile(r"^\s*\[([^\]]+)\]\s*$", re.MULTILINE)
|
||||||
|
BALL_PATTERN = re.compile(r"●\s*(.*?)\n", re.MULTILINE)
|
||||||
|
REFERENCE_MARK_PATTERN = re.compile(r"^\s*※\s*(\S.*?)\s*$", re.MULTILINE)
|
||||||
|
ESCAPED_STAR_PATTERN = re.compile(r"\\\*(.*)", re.MULTILINE)
|
||||||
|
NON_BREAKING_SPACE_PATTERN = re.compile(r"[\xa0\u2002\u2003\u2009]") # Various nbsp characters
|
||||||
|
EMPTY_CODE_BLOCK_PATTERN = re.compile(r"```[ \t]*\n[ \t]*\n```")
|
||||||
|
|
||||||
|
# Circled number patterns - precompile for better performance
|
||||||
|
CIRCLED_NUMBERS = {
|
||||||
|
"①": ("1", re.compile(r"^\s*①\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"②": ("2", re.compile(r"^\s*②\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"③": ("3", re.compile(r"^\s*③\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"④": ("4", re.compile(r"^\s*④\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"⑤": ("5", re.compile(r"^\s*⑤\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"⑥": ("6", re.compile(r"^\s*⑥\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"⑦": ("7", re.compile(r"^\s*⑦\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"⑧": ("8", re.compile(r"^\s*⑧\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"⑨": ("9", re.compile(r"^\s*⑨\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
"⑩": ("10", re.compile(r"^\s*⑩\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Markdown converter instance - reuse instead of creating for each article
|
||||||
|
MARKDOWN_CONVERTER = MarkdownConverter(
|
||||||
|
heading_style="ATX",
|
||||||
|
strip=["pre", "code"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_json(url: str, client: httpx.AsyncClient) -> dict[Any, Any] | None:
|
async def fetch_json(url: str, client: httpx.AsyncClient) -> dict[Any, Any] | None:
|
||||||
"""Fetch JSON data from a URL.
|
"""Fetch JSON data from a URL.
|
||||||
@@ -325,13 +354,7 @@ def format_discord_links(md: str) -> str:
|
|||||||
|
|
||||||
# Before: [Link](https://example.com "Link")
|
# Before: [Link](https://example.com "Link")
|
||||||
# After: [Link](https://example.com)
|
# After: [Link](https://example.com)
|
||||||
formatted_links_md: str = re.sub(
|
return DISCORD_LINK_PATTERN.sub(repl, md)
|
||||||
pattern=r'\[([^\]]+)\]\((https?://[^\s)]+) "\2"\)',
|
|
||||||
repl=repl,
|
|
||||||
string=md,
|
|
||||||
)
|
|
||||||
|
|
||||||
return formatted_links_md
|
|
||||||
|
|
||||||
|
|
||||||
def handle_stars(text: str) -> str:
|
def handle_stars(text: str) -> str:
|
||||||
@@ -411,91 +434,38 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
|||||||
if not article_content:
|
if not article_content:
|
||||||
article_content = article_title
|
article_content = article_title
|
||||||
|
|
||||||
converter: MarkdownConverter = MarkdownConverter(
|
article_content_converted = str(MARKDOWN_CONVERTER.convert(article_content).strip()) # type: ignore # noqa: PGH003
|
||||||
heading_style="ATX",
|
|
||||||
strip=["pre", "code"],
|
|
||||||
)
|
|
||||||
article_content_converted = str(converter.convert(article_content).strip()) # type: ignore # noqa: PGH003
|
|
||||||
|
|
||||||
if not article_content_converted:
|
if not article_content_converted:
|
||||||
msg: str = f"Article content is empty for article ID: {article_id}"
|
msg: str = f"Article content is empty for article ID: {article_id}"
|
||||||
logger.warning(msg)
|
logger.warning(msg)
|
||||||
article_content_converted = "No content available"
|
article_content_converted = "No content available"
|
||||||
|
|
||||||
# Remove non-breaking spaces
|
# Combine non-breaking space replacements in one pass
|
||||||
xa0_removed: str = re.sub(
|
content = NON_BREAKING_SPACE_PATTERN.sub(" ", article_content_converted)
|
||||||
r"\xa0", " ", article_content_converted
|
|
||||||
) # Replace non-breaking spaces with regular spaces
|
|
||||||
|
|
||||||
# Replace non-breaking spaces with regular spaces
|
# Remove empty code blocks
|
||||||
non_breaking_space_removed: str = xa0_removed.replace(
|
content = EMPTY_CODE_BLOCK_PATTERN.sub("", content)
|
||||||
" ", # noqa: RUF001
|
|
||||||
" ",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove code blocks that has only spaces and newlines inside them
|
|
||||||
empty_code_block_removed: str = re.sub(
|
|
||||||
pattern=r"```[ \t]*\n[ \t]*\n```",
|
|
||||||
repl="",
|
|
||||||
string=non_breaking_space_removed, # type: ignore # noqa: PGH003
|
|
||||||
)
|
|
||||||
|
|
||||||
# [How to Update] should be # How to Update
|
# [How to Update] should be # How to Update
|
||||||
square_brackets_converted: str = re.sub(
|
content = SQUARE_BRACKETS_PATTERN.sub(r"# \1", content)
|
||||||
pattern=r"^\s*\[([^\]]+)\]\s*$",
|
|
||||||
repl=r"# \1",
|
|
||||||
string=empty_code_block_removed, # type: ignore # noqa: PGH003
|
|
||||||
flags=re.MULTILINE,
|
|
||||||
)
|
|
||||||
|
|
||||||
stars_converted: str = handle_stars(square_brackets_converted)
|
content = handle_stars(content)
|
||||||
|
|
||||||
# If `● Word` is in the content, replace it `## Word` instead with regex
|
# If `● Word` is in the content, replace it `## Word` instead
|
||||||
ball_converted: str = re.sub(
|
content = BALL_PATTERN.sub(r"\n\n## \1\n\n", content)
|
||||||
pattern=r"●\s*(.*?)\n",
|
|
||||||
repl=r"\n\n## \1\n\n",
|
|
||||||
string=stars_converted,
|
|
||||||
flags=re.MULTILINE,
|
|
||||||
)
|
|
||||||
|
|
||||||
# If `※ Word` is in the content, replace it `* word * ` instead with regex
|
# If `※ Word` is in the content, replace it `* word * ` instead
|
||||||
reference_mark_converted: str = re.sub(
|
content = REFERENCE_MARK_PATTERN.sub(r"\n\n*\1*\n\n", content)
|
||||||
pattern=r"^\s*※\s*(\S.*?)\s*$",
|
|
||||||
repl=r"\n\n*\1*\n\n",
|
|
||||||
string=ball_converted,
|
|
||||||
flags=re.MULTILINE,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Replace circled Unicode numbers (①-⑳) with plain numbered text (e.g., "1. ", "2. ", ..., "20. ")
|
# Replace circled Unicode numbers with plain numbered text (using precompiled patterns)
|
||||||
number_symbol: dict[str, str] = {
|
for number, pattern in CIRCLED_NUMBERS.values():
|
||||||
"①": "1",
|
content = pattern.sub(rf"\n\n{number}. \1\n\n", content)
|
||||||
"②": "2",
|
|
||||||
"③": "3",
|
|
||||||
"④": "4",
|
|
||||||
"⑤": "5",
|
|
||||||
"⑥": "6",
|
|
||||||
"⑦": "7",
|
|
||||||
"⑧": "8",
|
|
||||||
"⑨": "9",
|
|
||||||
"⑩": "10",
|
|
||||||
}
|
|
||||||
for symbol, number in number_symbol.items():
|
|
||||||
reference_mark_converted = re.sub(
|
|
||||||
pattern=rf"^\s*{re.escape(symbol)}\s*(.*?)\s*$",
|
|
||||||
repl=rf"\n\n{number}. \1\n\n",
|
|
||||||
string=reference_mark_converted,
|
|
||||||
flags=re.MULTILINE,
|
|
||||||
)
|
|
||||||
|
|
||||||
space_before_star_added: str = re.sub(
|
content = ESCAPED_STAR_PATTERN.sub(r"* \1", content)
|
||||||
pattern=r"\\\*(.*)",
|
|
||||||
repl=r"* \1",
|
|
||||||
string=reference_mark_converted,
|
|
||||||
flags=re.MULTILINE,
|
|
||||||
)
|
|
||||||
|
|
||||||
markdown_formatted: str = mdformat.text( # type: ignore # noqa: PGH003
|
markdown_formatted: str = mdformat.text( # type: ignore # noqa: PGH003
|
||||||
space_before_star_added,
|
content,
|
||||||
options={
|
options={
|
||||||
"number": True, # Allow 1., 2., 3. numbering
|
"number": True, # Allow 1., 2., 3. numbering
|
||||||
},
|
},
|
||||||
@@ -556,7 +526,7 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
|||||||
html_file: Path = html_dir / f"{article_id}.html"
|
html_file: Path = html_dir / f"{article_id}.html"
|
||||||
if not html_file.is_file():
|
if not html_file.is_file():
|
||||||
with html_file.open("w", encoding="utf-8") as f:
|
with html_file.open("w", encoding="utf-8") as f:
|
||||||
f.write(str(BeautifulSoup(html, "html.parser").prettify()))
|
f.write(html)
|
||||||
logger.info("Saved HTML for article %s to %s", article_id, html_file)
|
logger.info("Saved HTML for article %s to %s", article_id, html_file)
|
||||||
|
|
||||||
# Set the file timestamp
|
# Set the file timestamp
|
||||||
@@ -588,7 +558,30 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
|||||||
return atom_feed
|
return atom_feed
|
||||||
|
|
||||||
|
|
||||||
def create_atom_feeds(output_dir: Path) -> None:
|
def load_all_articles(output_dir: Path) -> list[dict[Any, Any]]:
|
||||||
|
"""Load all article JSON files from the output directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_dir (Path): The directory containing article JSON files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[dict[Any, Any]]: List of article data dictionaries.
|
||||||
|
"""
|
||||||
|
articles: list[dict[Any, Any]] = []
|
||||||
|
for file in output_dir.glob("*.json"):
|
||||||
|
if file.stem == "ArticleMenu":
|
||||||
|
continue
|
||||||
|
with file.open("r", encoding="utf-8") as f:
|
||||||
|
try:
|
||||||
|
article_data: dict[Any, Any] = json.load(f)
|
||||||
|
articles.append(article_data)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.exception("Error decoding JSON from %s", file)
|
||||||
|
continue
|
||||||
|
return articles
|
||||||
|
|
||||||
|
|
||||||
|
def create_atom_feeds(articles: list[dict[Any, Any]], output_dir: Path) -> None:
|
||||||
"""Create Atom feeds for the articles.
|
"""Create Atom feeds for the articles.
|
||||||
|
|
||||||
Current feeds are:
|
Current feeds are:
|
||||||
@@ -596,28 +589,16 @@ def create_atom_feeds(output_dir: Path) -> None:
|
|||||||
- All articles
|
- All articles
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
articles (list[dict[Any, Any]]): List of article data.
|
||||||
output_dir (Path): The directory to save the RSS feed files.
|
output_dir (Path): The directory to save the RSS feed files.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
menu_data: list[dict[Any, Any]] = []
|
if not articles:
|
||||||
# Load data from all the articles
|
logger.error("Can't create Atom feeds, no articles provided")
|
||||||
for file in output_dir.glob("*.json"):
|
|
||||||
if file.stem == "ArticleMenu":
|
|
||||||
continue
|
|
||||||
with file.open("r", encoding="utf-8") as f:
|
|
||||||
try:
|
|
||||||
article_data: dict[Any, Any] = json.load(f)
|
|
||||||
menu_data.append(article_data)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.exception("Error decoding JSON from %s", file)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not menu_data:
|
|
||||||
logger.error("Can't create Atom feeds, no articles found in %s", output_dir)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
articles_sorted: list[dict[Any, Any]] = sorted(
|
articles_sorted: list[dict[Any, Any]] = sorted(
|
||||||
menu_data,
|
articles,
|
||||||
key=lambda x: get_file_timestamp(x.get("createTime", "")),
|
key=lambda x: get_file_timestamp(x.get("createTime", "")),
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
@@ -803,9 +784,12 @@ async def main() -> Literal[1, 0]:
|
|||||||
else:
|
else:
|
||||||
logger.info("No new articles to download")
|
logger.info("No new articles to download")
|
||||||
|
|
||||||
|
# Load all articles once for efficient processing
|
||||||
|
all_articles = load_all_articles(output_dir)
|
||||||
|
|
||||||
add_data_to_articles(menu_data, output_dir)
|
add_data_to_articles(menu_data, output_dir)
|
||||||
add_articles_to_readme(menu_data)
|
add_articles_to_readme(menu_data)
|
||||||
create_atom_feeds(output_dir)
|
create_atom_feeds(all_articles, output_dir)
|
||||||
batch_process_timestamps(menu_data, output_dir)
|
batch_process_timestamps(menu_data, output_dir)
|
||||||
|
|
||||||
logger.info("Script finished. Articles are in the '%s' directory.", output_dir)
|
logger.info("Script finished. Articles are in the '%s' directory.", output_dir)
|
||||||
|
|||||||
Reference in New Issue
Block a user