376 lines
13 KiB
Python
376 lines
13 KiB
Python
"""Fetch articles from the Wuthering Waves website and saves them locally in JSON format.
|
|
|
|
It retrieves the article menu and individual articles, prettifies the JSON output, and sets file timestamps based on article creation dates.
|
|
""" # noqa: CPY001
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import subprocess # noqa: S404
|
|
import time
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any, Literal, LiteralString
|
|
|
|
import aiofiles
|
|
import httpx
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Coroutine
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(message)s",
|
|
)
|
|
|
|
logger: logging.Logger = logging.getLogger("wutheringwaves")
|
|
|
|
|
|
async def fetch_json(url: str, client: httpx.AsyncClient) -> dict[Any, Any] | None:
|
|
"""Fetch JSON data from a URL.
|
|
|
|
Args:
|
|
url (str): The URL to fetch data from.
|
|
client (httpx.AsyncClient): The HTTP client to use for the request.
|
|
|
|
Returns:
|
|
dict[Any, Any] | None: The parsed JSON data if successful, None otherwise.
|
|
|
|
"""
|
|
try:
|
|
response: httpx.Response = await client.get(url)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except (httpx.RequestError, json.JSONDecodeError):
|
|
logger.exception("Error fetching %s:", url)
|
|
return None
|
|
|
|
|
|
async def save_prettified_json(data: dict[Any, Any], filepath: Path) -> bool:
|
|
"""Save JSON data to a file with pretty formatting.
|
|
|
|
Args:
|
|
data (dict[Any, Any]): The JSON data to save.
|
|
filepath (Path): The path to the file where the data will be saved.
|
|
|
|
Returns:
|
|
bool: True if the data was saved successfully, False otherwise.
|
|
|
|
"""
|
|
try:
|
|
async with aiofiles.open(filepath, "w", encoding="utf-8") as f:
|
|
await f.write(json.dumps(data, indent=2, ensure_ascii=False))
|
|
except Exception:
|
|
logger.exception("Error saving %s:", filepath)
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def set_file_timestamp(filepath: Path, timestamp_str: str) -> bool:
|
|
"""Set file's modification time based on ISO timestamp string.
|
|
|
|
Args:
|
|
filepath (Path): The path to the file.
|
|
timestamp_str (str): The ISO timestamp string.
|
|
|
|
Returns:
|
|
bool: True if the timestamp was set successfully, False otherwise.
|
|
|
|
"""
|
|
try:
|
|
# Parse the timestamp string
|
|
dt: datetime = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
|
|
|
|
# Convert to Unix timestamp
|
|
timestamp: float = dt.timestamp()
|
|
|
|
# Set the file's modification time
|
|
os.utime(filepath, (timestamp, timestamp))
|
|
except ValueError:
|
|
logger.info("Error setting timestamp for %s", filepath)
|
|
return False
|
|
else:
|
|
logger.info("Timestamp for %s set to %s", filepath, dt.isoformat())
|
|
return True
|
|
|
|
|
|
def get_file_timestamp(timestamp_str: str) -> float:
|
|
"""Convert ISO timestamp string to Unix timestamp.
|
|
|
|
Args:
|
|
timestamp_str (str): The ISO timestamp string.
|
|
|
|
Returns:
|
|
float: The Unix timestamp, or 0 if conversion failed.
|
|
|
|
"""
|
|
try:
|
|
# Parse the timestamp string
|
|
dt: datetime = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=UTC)
|
|
# Convert to Unix timestamp
|
|
return dt.timestamp()
|
|
except ValueError:
|
|
logger.info("Error converting timestamp %s", timestamp_str)
|
|
return 0.0
|
|
|
|
|
|
def commit_file_with_timestamp(filepath: Path) -> bool: # noqa: PLR0911
|
|
"""Commit a file to Git with its modification time as the commit time.
|
|
|
|
Args:
|
|
filepath (Path): The path to the file to commit.
|
|
|
|
Returns:
|
|
bool: True if the commit was successful, False otherwise.
|
|
|
|
"""
|
|
# Check in Git history if we already have this file
|
|
git_log_cmd: list[str] = ["git", "log", "--pretty=format:%H", "--follow", str(filepath)]
|
|
try:
|
|
git_log_output: str = subprocess.check_output(git_log_cmd, text=True).strip() # noqa: S603
|
|
if git_log_output:
|
|
logger.info("File %s already exists in Git history.", filepath)
|
|
return True
|
|
except subprocess.CalledProcessError:
|
|
logger.exception("Error checking Git history for %s", filepath)
|
|
return False
|
|
|
|
try:
|
|
# Get the full path to the Git executable
|
|
git_executable: str | None = shutil.which("git")
|
|
if not git_executable:
|
|
logger.error("Git executable not found.")
|
|
return False
|
|
|
|
# Validate the filepath
|
|
if not filepath.is_file():
|
|
logger.error("Invalid file path: %s", filepath)
|
|
return False
|
|
|
|
# Get the file's modification time
|
|
timestamp: float = filepath.stat().st_mtime
|
|
git_time: str = datetime.fromtimestamp(timestamp, tz=UTC).strftime("%Y-%m-%dT%H:%M:%S")
|
|
|
|
# Stage the file
|
|
subprocess.run([git_executable, "add", str(filepath)], check=True, text=True) # noqa: S603
|
|
|
|
# Commit the file with the modification time as the commit time
|
|
env: dict[str, str] = {
|
|
**os.environ,
|
|
"GIT_AUTHOR_DATE": git_time,
|
|
"GIT_COMMITTER_DATE": git_time,
|
|
}
|
|
subprocess.run( # noqa: S603
|
|
[git_executable, "commit", "-m", f"Add {filepath.name}"],
|
|
check=True,
|
|
env=env,
|
|
text=True,
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
logger.exception("Subprocess error occurred while committing the file.")
|
|
return False
|
|
except Exception:
|
|
logger.exception("Error committing %s to Git", filepath)
|
|
return False
|
|
else:
|
|
logger.info("Successfully committed %s to Git", filepath)
|
|
return True
|
|
|
|
|
|
def add_articles_to_readme(articles: dict[Any, Any] | None = None) -> None:
|
|
"""Add the list of articles to the README.md file."""
|
|
if articles is None:
|
|
logger.warning("No articles to add to README.md")
|
|
return
|
|
|
|
readme_file: Path = Path("README.md")
|
|
if not readme_file.is_file():
|
|
logger.error("README.md file not found.")
|
|
return
|
|
|
|
with readme_file.open("r+", encoding="utf-8") as f:
|
|
# Read existing content
|
|
lines: list[str] = f.readlines()
|
|
|
|
# Find "## Articles" section or add it
|
|
articles_section_index = -1
|
|
for i, line in enumerate(lines):
|
|
if line.strip() == "## Articles":
|
|
articles_section_index: int = i
|
|
break
|
|
|
|
# Create new content
|
|
new_lines: list[str] = []
|
|
if articles_section_index >= 0:
|
|
new_lines = lines[: articles_section_index + 1] # Keep everything up to "## Articles"
|
|
else:
|
|
new_lines = lines
|
|
if new_lines and not new_lines[-1].endswith("\n"):
|
|
new_lines.append("\n")
|
|
new_lines.append("## Articles\n")
|
|
|
|
# Add articles
|
|
new_lines.append("\n") # Add a blank line after the heading
|
|
for article in sorted(articles, key=lambda x: x.get("createTime", ""), reverse=True):
|
|
article_id: str = str(article.get("articleId", ""))
|
|
article_title: str = article.get("articleTitle", "No Title")
|
|
article_url: str = f"https://wutheringwaves.kurogames.com/en/main/news/detail/{article_id}"
|
|
new_lines.append(
|
|
f"- [{article_title}]({article_url}) [[json]](articles/{article_id}.json)\n",
|
|
)
|
|
|
|
# Add articles directory section
|
|
new_lines.append("\n## Articles Directory\n")
|
|
new_lines.append("The articles are saved in the `articles` directory.\n")
|
|
new_lines.append("You can view them [here](articles).\n")
|
|
|
|
# Write the updated content
|
|
f.seek(0)
|
|
f.truncate()
|
|
f.writelines(new_lines)
|
|
|
|
logger.info("Articles added to README.md")
|
|
|
|
|
|
def batch_process_timestamps(menu_data: dict[Any, Any], output_dir: Path) -> None:
|
|
"""Process all timestamps in batch for better performance.
|
|
|
|
Args:
|
|
menu_data (list[dict[str, Any]]): The article menu data containing timestamps.
|
|
output_dir (Path): Directory containing the article files.
|
|
|
|
"""
|
|
# Extract article IDs and timestamps
|
|
timestamp_map: dict[str, str] = {}
|
|
for item in menu_data:
|
|
article_id = str(item.get("articleId", ""))
|
|
create_time = item.get("createTime")
|
|
if article_id and create_time:
|
|
timestamp_map[article_id] = create_time
|
|
|
|
logger.info("Collected %s timestamps from menu data", len(timestamp_map))
|
|
|
|
# Check which files need timestamp updates
|
|
files_to_update: list[tuple[Path, str]] = []
|
|
for article_id, create_time in timestamp_map.items():
|
|
file_path: Path = output_dir / f"{article_id}.json"
|
|
if not file_path.exists():
|
|
continue
|
|
|
|
expected_timestamp: float = get_file_timestamp(create_time)
|
|
if expected_timestamp == 0.0:
|
|
continue
|
|
|
|
actual_timestamp: float = file_path.stat().st_mtime
|
|
|
|
# Only update if timestamps don't match (with a small tolerance)
|
|
if abs(actual_timestamp - expected_timestamp) > 1.0:
|
|
files_to_update.append((file_path, create_time))
|
|
|
|
logger.info("Found %s files that need timestamp updates", len(files_to_update))
|
|
|
|
# Update timestamps and commit files
|
|
for file_path, create_time in files_to_update:
|
|
logger.info("Setting %s timestamp to %s", file_path, create_time)
|
|
if set_file_timestamp(file_path, create_time):
|
|
if not commit_file_with_timestamp(file_path):
|
|
logger.error("Failed to commit file %s to Git", file_path)
|
|
else:
|
|
logger.error("Failed to update timestamp for %s", file_path)
|
|
|
|
|
|
async def main() -> Literal[1, 0]:
|
|
"""Fetch and save articles from the Wuthering Waves website.
|
|
|
|
Returns:
|
|
Literal[1, 0]: 1 if an error occurred, 0 otherwise.
|
|
|
|
"""
|
|
# Setup
|
|
current_time = int(time.time() * 1000) # Current time in milliseconds
|
|
base_url = "https://hw-media-cdn-mingchao.kurogame.com/akiwebsite/website2.0/json/G152/en"
|
|
article_menu_url: str = f"{base_url}/ArticleMenu.json?t={current_time}"
|
|
article_base_url: LiteralString = f"{base_url}/article/"
|
|
output_dir = Path("articles")
|
|
output_dir.mkdir(exist_ok=True)
|
|
|
|
logger.info("Fetching article menu from %s", article_menu_url)
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
# Fetch the article menu
|
|
menu_data: dict[Any, Any] | None = await fetch_json(article_menu_url, client)
|
|
if not menu_data:
|
|
logger.error("Error: Fetched ArticleMenu.json is empty")
|
|
return 1
|
|
|
|
# Save and prettify the menu JSON
|
|
menu_file: Path = output_dir / "ArticleMenu.json"
|
|
if await save_prettified_json(menu_data, menu_file):
|
|
logger.info("Menu JSON saved and prettified to %s", menu_file)
|
|
|
|
# Extract article IDs
|
|
logger.info("Extracting article IDs...")
|
|
article_ids: list[str] = [str(item["articleId"]) for item in menu_data if item.get("articleId")]
|
|
|
|
if not article_ids:
|
|
logger.warning("No article IDs found. Please check the JSON structure of ArticleMenu.json.")
|
|
logger.warning("Full menu response for debugging:")
|
|
logger.warning(json.dumps(menu_data, indent=2))
|
|
return 1
|
|
|
|
# Get list of already downloaded article IDs
|
|
existing_files: list[str] = [file.stem for file in output_dir.glob("*.json") if file.stem != "ArticleMenu"]
|
|
|
|
# Filter out already downloaded articles
|
|
new_article_ids: list[str] = [article_id for article_id in article_ids if article_id not in existing_files]
|
|
|
|
if new_article_ids:
|
|
logger.info("Found %s new articles to download", len(new_article_ids))
|
|
|
|
# Download each new article
|
|
download_tasks: list[Coroutine[Any, Any, dict[Any, Any] | None]] = []
|
|
for article_id in new_article_ids:
|
|
article_url: str = f"{article_base_url}{article_id}.json?t={current_time}"
|
|
output_file: Path = output_dir / f"{article_id}.json"
|
|
|
|
logger.info("Downloading article %s from %s", article_id, article_url)
|
|
download_tasks.append(fetch_json(article_url, client))
|
|
|
|
# Wait for all downloads to complete
|
|
results: list[dict[Any, Any] | BaseException | None] = await asyncio.gather(*download_tasks, return_exceptions=True)
|
|
|
|
# Process the downloaded articles
|
|
for i, result in enumerate(results):
|
|
article_id: str = new_article_ids[i]
|
|
output_file = output_dir / f"{article_id}.json"
|
|
|
|
if isinstance(result, Exception):
|
|
logger.error("Error downloading article %s: %s", article_id, result)
|
|
continue
|
|
|
|
if not result:
|
|
logger.warning("Downloaded article %s is empty or invalid", article_id)
|
|
continue
|
|
|
|
# Save the article JSON
|
|
if isinstance(result, dict) and await save_prettified_json(result, output_file):
|
|
logger.info("Successfully downloaded and prettified %s", output_file)
|
|
else:
|
|
logger.info("No new articles to download")
|
|
|
|
# Process timestamps in batch
|
|
batch_process_timestamps(menu_data, output_dir)
|
|
|
|
# Update the README
|
|
add_articles_to_readme(menu_data)
|
|
|
|
logger.info("Script finished. Articles are in the '%s' directory.", output_dir)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|