All checks were successful
Deploy to Server / deploy (push) Successful in 18s
121 lines
4 KiB
Python
121 lines
4 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
from django.core.management.base import BaseCommand
|
|
from django.core.management.base import CommandError
|
|
from index_now import IndexNowAuthentication
|
|
from index_now import submit_urls_to_index_now
|
|
from sitemap_parser import SiteMapParser
|
|
|
|
if TYPE_CHECKING:
|
|
from argparse import ArgumentParser
|
|
|
|
from sitemap_parser import SitemapIndex
|
|
from sitemap_parser import UrlSet
|
|
|
|
|
|
def get_urls_from_sitemap(sitemap_url: str, list_of_urls: list[str]) -> list[str]:
|
|
"""Recursively parse a sitemap URL and extract all URLs.
|
|
|
|
Args:
|
|
sitemap_url: The URL of the sitemap to parse.
|
|
list_of_urls: A list to accumulate the extracted URLs.
|
|
|
|
Returns:
|
|
A list of URLs extracted from the sitemap and any nested sitemaps.
|
|
"""
|
|
parser = SiteMapParser(source=sitemap_url)
|
|
|
|
if parser.has_sitemaps():
|
|
sitemaps: SitemapIndex = parser.get_sitemaps()
|
|
for sitemap in sitemaps:
|
|
if not sitemap.loc:
|
|
continue
|
|
|
|
list_of_urls.extend(
|
|
get_urls_from_sitemap(
|
|
sitemap_url=sitemap.loc,
|
|
list_of_urls=list_of_urls,
|
|
),
|
|
)
|
|
|
|
elif parser.has_urls():
|
|
urls: UrlSet = parser.get_urls()
|
|
list_of_urls.extend(url.loc for url in urls if url.loc)
|
|
|
|
return list_of_urls
|
|
|
|
|
|
def get_chucked_urls(urls: list[str], chunk_size: int = 9999) -> list[list[str]]:
|
|
"""Split a list of URLs into smaller chunks.
|
|
|
|
Args:
|
|
urls: The list of URLs to split.
|
|
chunk_size: The maximum number of URLs in each chunk.
|
|
|
|
Returns:
|
|
A list of URL chunks, where each chunk is a list of URLs.
|
|
"""
|
|
return [urls[i : i + chunk_size] for i in range(0, len(urls), chunk_size)]
|
|
|
|
|
|
class Command(BaseCommand):
|
|
"""Submit sitemap URLs to the IndexNow API.
|
|
|
|
This command is useful when the site sitemap has changed and you want to
|
|
request that search engines re-index the updated URLs.
|
|
"""
|
|
|
|
help = "Submit sitemap to the IndexNow API."
|
|
|
|
def add_arguments(self, parser: ArgumentParser) -> None:
|
|
"""Register command-line arguments."""
|
|
parser.add_argument(
|
|
"--api-key-location",
|
|
default="",
|
|
help="URL where the IndexNow API key file can be downloaded.",
|
|
)
|
|
|
|
def handle(self, **options: str | bool) -> None:
|
|
"""Execute the command.
|
|
|
|
Raises:
|
|
CommandError: When the submission to IndexNow fails.
|
|
"""
|
|
api_key_location: str = str(options["api_key_location"])
|
|
sitemap: str = "https://ttvdrops.lovinator.space/sitemap.xml"
|
|
|
|
# https://lovinator.space/{api_key}.txt
|
|
api_key: str = (
|
|
api_key_location.removesuffix("/").rsplit("/", 1)[-1].removesuffix(".txt")
|
|
)
|
|
if not api_key:
|
|
msg = "API key could not be extracted from the provided URL."
|
|
raise CommandError(msg)
|
|
|
|
self.stdout.write(f"Submitting sitemap to IndexNow with API key: {api_key}")
|
|
|
|
auth = IndexNowAuthentication(
|
|
host="ttvdrops.lovinator.space",
|
|
api_key=api_key,
|
|
api_key_location=api_key_location,
|
|
)
|
|
|
|
status_code: int = 0
|
|
urls: list[str] = get_urls_from_sitemap(sitemap_url=sitemap, list_of_urls=[])
|
|
chucked_urls: list[list[str]] = get_chucked_urls(urls=urls)
|
|
for chunk in chucked_urls:
|
|
self.stdout.write(f"Submitting chunk of {len(chunk)} URLs to IndexNow...")
|
|
try:
|
|
status_code: int = submit_urls_to_index_now(
|
|
authentication=auth,
|
|
urls=chunk,
|
|
)
|
|
except Exception as exc:
|
|
msg: str = f"Error submitting sitemap(s) to IndexNow: {exc}"
|
|
raise CommandError(msg) from exc
|
|
|
|
self.stdout.write(self.style.SUCCESS(f"IndexNow response: {status_code}"))
|
|
|
|
self.stdout.write(self.style.SUCCESS(f"IndexNow response: {status_code}"))
|