Keep newlines in Atom feed
This commit is contained in:
7340
articles_all.xml
7340
articles_all.xml
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
10
scrape.py
10
scrape.py
@ -294,8 +294,12 @@ def strip_unsafe_tags(content: str) -> str:
|
|||||||
str: The cleaned HTML content.
|
str: The cleaned HTML content.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
sanitizer = Sanitizer()
|
sanitizer = Sanitizer({
|
||||||
return sanitizer.sanitize(content) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
|
"tags": {"a", "br", "b", "strong", "i", "em", "code", "s", "strike", "del", "u"},
|
||||||
|
"empty": {"a", "br"},
|
||||||
|
"separate": {"br"},
|
||||||
|
})
|
||||||
|
return sanitizer.sanitize(content.replace("\n", "<br>")).replace("<br>", "\n") # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
|
||||||
|
|
||||||
|
|
||||||
def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
||||||
@ -355,7 +359,7 @@ def generate_atom_feed(articles: list[dict[Any, Any]], file_name: str) -> str:
|
|||||||
<id>{entry_id}</id>
|
<id>{entry_id}</id>
|
||||||
<title>{escape(article_title)}</title>
|
<title>{escape(article_title)}</title>
|
||||||
<link href="{article_url}" rel="alternate" type="text/html"/>
|
<link href="{article_url}" rel="alternate" type="text/html"/>
|
||||||
<content type="html">{escape(article_content)}</content>
|
<content type="html">{escape(article_content.strip())}</content>
|
||||||
{published}
|
{published}
|
||||||
<updated>{updated}</updated>
|
<updated>{updated}</updated>
|
||||||
{category}
|
{category}
|
||||||
|
Reference in New Issue
Block a user