mirror of
https://github.com/TheLovinator1/wutheringwaves.git
synced 2025-11-27 20:49:48 +01:00
Optimize MarkdownConverter by creating single instance at module level
Co-authored-by: TheLovinator1 <4153203+TheLovinator1@users.noreply.github.com>
This commit is contained in:
12
scrape.py
12
scrape.py
@@ -50,6 +50,12 @@ CIRCLED_NUMBERS = {
|
|||||||
"⑩": ("10", re.compile(r"^\s*⑩\s*(.*?)\s*$", re.MULTILINE)),
|
"⑩": ("10", re.compile(r"^\s*⑩\s*(.*?)\s*$", re.MULTILINE)),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Markdown converter instance - reuse instead of creating for each article
|
||||||
|
MARKDOWN_CONVERTER = MarkdownConverter(
|
||||||
|
heading_style="ATX",
|
||||||
|
strip=["pre", "code"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_json(url: str, client: httpx.AsyncClient) -> dict[Any, Any] | None:
|
async def fetch_json(url: str, client: httpx.AsyncClient) -> dict[Any, Any] | None:
|
||||||
@@ -428,11 +434,7 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
|||||||
if not article_content:
|
if not article_content:
|
||||||
article_content = article_title
|
article_content = article_title
|
||||||
|
|
||||||
converter: MarkdownConverter = MarkdownConverter(
|
article_content_converted = str(MARKDOWN_CONVERTER.convert(article_content).strip()) # type: ignore # noqa: PGH003
|
||||||
heading_style="ATX",
|
|
||||||
strip=["pre", "code"],
|
|
||||||
)
|
|
||||||
article_content_converted = str(converter.convert(article_content).strip()) # type: ignore # noqa: PGH003
|
|
||||||
|
|
||||||
if not article_content_converted:
|
if not article_content_converted:
|
||||||
msg: str = f"Article content is empty for article ID: {article_id}"
|
msg: str = f"Article content is empty for article ID: {article_id}"
|
||||||
|
|||||||
Reference in New Issue
Block a user