This commit is contained in:
Joakim Hellsén 2024-05-22 22:22:08 +02:00
commit e8e5e87c07
No known key found for this signature in database
GPG key ID: D196AE66FEBE1DC9
6 changed files with 64 additions and 5 deletions

View file

@ -0,0 +1,35 @@
"""Scrape https://github.com/rumca-js/RSS-Link-Database for RSS links."""
from pathlib import Path
import orjson
from click import echo
def scrape():
"""Scrape.
Raises:
FileNotFoundError: If the RSS-Link-Database repository is not found.
"""
repository_path = Path("RSS-Link-Database")
if not repository_path.exists():
msg = "RSS-Link-Database repository not found."
raise FileNotFoundError(msg)
rss_links = []
for file in repository_path.glob("*.json"):
echo(f"Scraping {file.name}...")
with file.open("r", encoding="utf-8") as f:
data = orjson.loads(f.read())
for d in data:
if d.get("url"):
rss_links.append(d["url"])
if d.get("link"):
rss_links.append(d["link"])
rss_links = list(set(rss_links))
return "\n".join(rss_links)