Files
AzerothCore-RealmMaster/scripts/python/update_module_manifest.py
2025-12-27 15:30:59 -05:00

393 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
"""Generate or update config/module-manifest.json from GitHub topics.
The script queries the GitHub Search API for repositories tagged with
AzerothCore-specific topics (for example ``azerothcore-module`` or
``azerothcore-lua``) and merges the discovered projects into the existing
module manifest. It intentionally keeps all user-defined fields intact so the
script can be run safely in CI or locally to add new repositories as they are
published.
"""
from __future__ import annotations
import argparse
import json
import os
import re
import sys
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Sequence
from urllib import error, parse, request
API_ROOT = "https://api.github.com"
DEFAULT_TOPICS = [
"azerothcore-module",
"azerothcore-module+ac-premium",
"azerothcore-tools",
"azerothcore-lua",
"azerothcore-sql",
]
# Map topic keywords to module ``type`` values used in the manifest.
TOPIC_TYPE_HINTS = {
"azerothcore-lua": "lua",
"lua": "lua",
"azerothcore-sql": "sql",
"sql": "sql",
"azerothcore-tools": "tool",
"tools": "tool",
}
CATEGORY_BY_TYPE = {
"lua": "scripting",
"sql": "database",
"tool": "tooling",
"data": "data",
"cpp": "uncategorized",
}
USER_AGENT = "acore-compose-module-manifest"
def parse_args(argv: Sequence[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--manifest",
default="config/module-manifest.json",
help="Path to manifest JSON file (default: %(default)s)",
)
parser.add_argument(
"--topic",
action="append",
default=[],
dest="topics",
help="GitHub topic (or '+' separated topics) to scan. Defaults to core topics if not provided.",
)
parser.add_argument(
"--token",
help="GitHub API token (defaults to $GITHUB_TOKEN or $GITHUB_API_TOKEN)",
)
parser.add_argument(
"--max-pages",
type=int,
default=10,
help="Maximum pages (x100 results) to fetch per topic (default: %(default)s)",
)
parser.add_argument(
"--refresh-existing",
action="store_true",
help="Refresh name/description/type for repos already present in manifest",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Fetch and display the summary without writing to disk",
)
parser.add_argument(
"--log",
action="store_true",
help="Print verbose progress information",
)
parser.add_argument(
"--update-template",
default=".env.template",
help="Update .env.template with missing module variables (default: %(default)s)",
)
parser.add_argument(
"--skip-template",
action="store_true",
help="Skip updating .env.template",
)
return parser.parse_args(argv)
@dataclass
class RepoRecord:
data: dict
topic_expr: str
module_type: str
class GitHubClient:
def __init__(self, token: Optional[str], verbose: bool = False) -> None:
self.token = token
self.verbose = verbose
def _request(self, url: str) -> dict:
req = request.Request(url)
req.add_header("Accept", "application/vnd.github+json")
req.add_header("User-Agent", USER_AGENT)
if self.token:
req.add_header("Authorization", f"Bearer {self.token}")
try:
with request.urlopen(req) as resp:
payload = resp.read().decode("utf-8")
return json.loads(payload)
except error.HTTPError as exc: # pragma: no cover - network failure path
detail = exc.read().decode("utf-8", errors="ignore")
raise RuntimeError(f"GitHub API request failed: {exc.code} {exc.reason}: {detail}") from exc
def search_repositories(self, topic_expr: str, max_pages: int) -> List[dict]:
query = build_topic_query(topic_expr)
results: List[dict] = []
for page in range(1, max_pages + 1):
url = (
f"{API_ROOT}/search/repositories?"
f"q={parse.quote(query)}&per_page=100&page={page}&sort=updated&order=desc"
)
data = self._request(url)
items = data.get("items", [])
if self.verbose:
print(f"Fetched {len(items)} repos for '{topic_expr}' (page {page})")
results.extend(items)
if len(items) < 100:
break
# Avoid secondary rate-limits.
time.sleep(0.5)
return results
def build_topic_query(expr: str) -> str:
parts = [part.strip() for part in expr.split("+") if part.strip()]
if not parts:
raise ValueError("Topic expression must contain at least one topic")
return "+".join(f"topic:{part}" for part in parts)
def guess_module_type(expr: str) -> str:
parts = [part.strip().lower() for part in expr.split("+") if part.strip()]
for part in parts:
hint = TOPIC_TYPE_HINTS.get(part)
if hint:
return hint
return "cpp"
def normalize_repo_url(url: str) -> str:
if url.endswith(".git"):
return url[:-4]
return url
def repo_name_to_key(name: str) -> str:
sanitized = re.sub(r"[^A-Za-z0-9]+", "_", name).strip("_")
sanitized = sanitized.upper()
if not sanitized:
sanitized = "MODULE_UNKNOWN"
if not sanitized.startswith("MODULE_"):
sanitized = f"MODULE_{sanitized}"
return sanitized
def load_manifest(path: str) -> Dict[str, List[dict]]:
manifest_path = os.path.abspath(path)
if not os.path.exists(manifest_path):
return {"modules": []}
try:
with open(manifest_path, "r", encoding="utf-8") as handle:
return json.load(handle)
except json.JSONDecodeError as exc:
raise RuntimeError(f"Unable to parse manifest {path}: {exc}") from exc
def ensure_defaults(entry: dict) -> None:
entry.setdefault("type", "cpp")
entry.setdefault("status", "active")
entry.setdefault("order", 5000)
entry.setdefault("requires", [])
entry.setdefault("post_install_hooks", [])
entry.setdefault("config_cleanup", [])
def update_entry_from_repo(entry: dict, repo: dict, repo_type: str, topic_expr: str, refresh: bool) -> None:
# Only overwrite descriptive fields when refresh is enabled or when they are missing.
if refresh or not entry.get("name"):
entry["name"] = repo.get("name") or entry.get("name")
if refresh or not entry.get("repo"):
entry["repo"] = repo.get("clone_url") or repo.get("html_url", entry.get("repo"))
if refresh or not entry.get("description"):
entry["description"] = repo.get("description") or entry.get("description", "")
if refresh or not entry.get("type"):
entry["type"] = repo_type
if refresh or not entry.get("category"):
entry["category"] = CATEGORY_BY_TYPE.get(repo_type, entry.get("category", "uncategorized"))
ensure_defaults(entry)
notes = entry.get("notes") or ""
tag_note = f"Discovered via GitHub topic '{topic_expr}'"
if tag_note not in notes:
entry["notes"] = (notes + " \n" + tag_note).strip()
def merge_repositories(
manifest: Dict[str, List[dict]],
repos: Iterable[RepoRecord],
refresh_existing: bool,
) -> tuple[int, int]:
modules = manifest.setdefault("modules", [])
by_key = {module.get("key"): module for module in modules if module.get("key")}
by_repo = {
normalize_repo_url(str(module.get("repo", ""))): module
for module in modules
if module.get("repo")
}
added = 0
updated = 0
for record in repos:
repo = record.data
repo_url = normalize_repo_url(repo.get("clone_url") or repo.get("html_url") or "")
existing = by_repo.get(repo_url)
key = repo_name_to_key(repo.get("name", ""))
if not existing:
existing = by_key.get(key)
if not existing:
existing = {
"key": key,
"name": repo.get("name", key),
"repo": repo.get("clone_url") or repo.get("html_url", ""),
"description": repo.get("description") or "",
"type": record.module_type,
"category": CATEGORY_BY_TYPE.get(record.module_type, "uncategorized"),
"notes": "",
}
ensure_defaults(existing)
modules.append(existing)
by_key[key] = existing
if repo_url:
by_repo[repo_url] = existing
added += 1
else:
updated += 1
update_entry_from_repo(existing, repo, record.module_type, record.topic_expr, refresh_existing)
return added, updated
def collect_repositories(
client: GitHubClient, topics: Sequence[str], max_pages: int
) -> List[RepoRecord]:
seen: Dict[str, RepoRecord] = {}
for expr in topics:
repos = client.search_repositories(expr, max_pages)
repo_type = guess_module_type(expr)
for repo in repos:
full_name = repo.get("full_name")
if not full_name:
continue
record = seen.get(full_name)
if record is None:
seen[full_name] = RepoRecord(repo, expr, repo_type)
else:
# Prefer the most specific type (non-default) if available.
if record.module_type == "cpp" and repo_type != "cpp":
record.module_type = repo_type
return list(seen.values())
def update_env_template(manifest_path: str, template_path: str) -> bool:
"""Update .env.template with missing module variables.
Args:
manifest_path: Path to the module manifest JSON file
template_path: Path to .env.template file
Returns:
True if template was updated, False if no changes needed
"""
# Load manifest to get all module keys
manifest = load_manifest(manifest_path)
modules = manifest.get("modules", [])
if not modules:
return False
# Extract all module keys
module_keys = set()
for module in modules:
key = module.get("key")
if key:
module_keys.add(key)
if not module_keys:
return False
# Check if template file exists
template_file = Path(template_path)
if not template_file.exists():
print(f"Warning: .env.template not found at {template_path}")
return False
# Read current template content
try:
current_content = template_file.read_text(encoding="utf-8")
current_lines = current_content.splitlines()
except Exception as exc:
print(f"Error reading .env.template: {exc}")
return False
# Find which module variables are missing
existing_vars = set()
for line in current_lines:
line = line.strip()
if "=" in line and not line.startswith("#"):
var_name = line.split("=", 1)[0].strip()
existing_vars.add(var_name)
missing_vars = module_keys - existing_vars
if not missing_vars:
print("✅ All module variables present in .env.template")
return False
# Add missing variables to the end of the file
print(f"📝 Adding {len(missing_vars)} missing module variable(s) to .env.template:")
# Sort missing vars for consistent output
sorted_missing = sorted(missing_vars)
# Prepare new content
new_lines = current_lines[:]
for var in sorted_missing:
new_lines.append(f"{var}=0")
print(f"{var}=0")
# Write updated content
try:
new_content = "\n".join(new_lines) + "\n"
template_file.write_text(new_content, encoding="utf-8")
print("✅ .env.template updated successfully")
return True
except Exception as exc:
print(f"Error writing .env.template: {exc}")
return False
def main(argv: Sequence[str]) -> int:
args = parse_args(argv)
topics = args.topics or DEFAULT_TOPICS
token = args.token or os.environ.get("GITHUB_TOKEN") or os.environ.get("GITHUB_API_TOKEN")
client = GitHubClient(token, verbose=args.log)
manifest = load_manifest(args.manifest)
repos = collect_repositories(client, topics, args.max_pages)
added, updated = merge_repositories(manifest, repos, args.refresh_existing)
if args.dry_run:
print(f"Discovered {len(repos)} repositories (added={added}, updated={updated})")
return 0
with open(args.manifest, "w", encoding="utf-8") as handle:
json.dump(manifest, handle, indent=2)
handle.write("\n")
print(f"Updated manifest {args.manifest}: added {added}, refreshed {updated}")
# Update .env.template if requested and we have changes
if not args.skip_template and (added > 0 or updated > 0):
template_updated = update_env_template(args.manifest, args.update_template)
if template_updated:
print(f"Updated {args.update_template} with new module variables")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))