diff --git a/scrape.py b/scrape.py index aae4172..b5aa799 100644 --- a/scrape.py +++ b/scrape.py @@ -33,7 +33,7 @@ SQUARE_BRACKETS_PATTERN = re.compile(r"^\s*\[([^\]]+)\]\s*$", re.MULTILINE) BALL_PATTERN = re.compile(r"●\s*(.*?)\n", re.MULTILINE) REFERENCE_MARK_PATTERN = re.compile(r"^\s*※\s*(\S.*?)\s*$", re.MULTILINE) ESCAPED_STAR_PATTERN = re.compile(r"\\\*(.*)", re.MULTILINE) -NON_BREAKING_SPACE_PATTERN = re.compile(r"[\xa0\u00a0\u2002\u2003\u2009]") # Various nbsp characters +NON_BREAKING_SPACE_PATTERN = re.compile(r"[\xa0\u2002\u2003\u2009]") # Various nbsp characters EMPTY_CODE_BLOCK_PATTERN = re.compile(r"```[ \t]*\n[ \t]*\n```") # Circled number patterns - precompile for better performance @@ -459,7 +459,7 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str: content = REFERENCE_MARK_PATTERN.sub(r"\n\n*\1*\n\n", content) # Replace circled Unicode numbers with plain numbered text (using precompiled patterns) - for symbol, (number, pattern) in CIRCLED_NUMBERS.items(): + for number, pattern in CIRCLED_NUMBERS.values(): content = pattern.sub(rf"\n\n{number}. \1\n\n", content) content = ESCAPED_STAR_PATTERN.sub(r"* \1", content)