mirror of
https://github.com/uprightbass360/AzerothCore-RealmMaster.git
synced 2026-01-13 09:07:20 +00:00
cleanup: validation and integrations for importing data
This commit is contained in:
182
scripts/python/report_missing_modules.py
Normal file
182
scripts/python/report_missing_modules.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate a categorized list of GitHub modules missing from the manifest.
|
||||
|
||||
The script reuses the discovery logic from ``update_module_manifest.py`` to
|
||||
fetch repositories by topic, filters out entries already tracked in
|
||||
``config/module-manifest.json`` and writes the remainder (including type,
|
||||
category, and inferred dependency hints) to a JSON file.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Sequence, Tuple
|
||||
|
||||
from update_module_manifest import ( # type: ignore
|
||||
CATEGORY_BY_TYPE,
|
||||
DEFAULT_TOPICS,
|
||||
GitHubClient,
|
||||
collect_repositories,
|
||||
load_manifest,
|
||||
normalize_repo_url,
|
||||
repo_name_to_key,
|
||||
)
|
||||
|
||||
# heuristics used to surface potential dependency hints
|
||||
DEPENDENCY_KEYWORDS: Tuple[Tuple[str, str], ...] = (
|
||||
("playerbot", "MODULE_PLAYERBOTS"),
|
||||
("ah-bot", "MODULE_PLAYERBOTS"),
|
||||
("eluna", "MODULE_ELUNA"),
|
||||
)
|
||||
|
||||
# keywords that help categorize entries that should probably stay hidden by default
|
||||
SUPPRESSION_KEYWORDS: Tuple[Tuple[str, str], ...] = (
|
||||
("virtual machine", "vm"),
|
||||
(" vm ", "vm"),
|
||||
(" docker", "docker"),
|
||||
("container", "docker"),
|
||||
("vagrant", "vagrant"),
|
||||
("ansible", "automation"),
|
||||
("terraform", "automation"),
|
||||
("client", "client-distribution"),
|
||||
("launcher", "client-distribution"),
|
||||
)
|
||||
|
||||
|
||||
def parse_args(argv: Sequence[str]) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--manifest",
|
||||
default="config/module-manifest.json",
|
||||
help="Path to module manifest JSON (default: %(default)s)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
default="missing-modules.json",
|
||||
help="Path to write the missing-module report JSON (default: %(default)s)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--topic",
|
||||
action="append",
|
||||
default=[],
|
||||
dest="topics",
|
||||
help="GitHub topic (or '+' expression) to scan (defaults to built-in list).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-pages",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Maximum pages (x100 results) to fetch per topic (default: %(default)s)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--token",
|
||||
help="GitHub API token (defaults to $GITHUB_TOKEN or $GITHUB_API_TOKEN)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log",
|
||||
action="store_true",
|
||||
help="Print verbose progress information",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def implied_dependencies(module_type: str, text: str) -> List[str]:
|
||||
deps: List[str] = []
|
||||
if module_type == "lua":
|
||||
deps.append("MODULE_ELUNA")
|
||||
normalized = text.lower()
|
||||
for keyword, dep in DEPENDENCY_KEYWORDS:
|
||||
if keyword in normalized and dep not in deps:
|
||||
deps.append(dep)
|
||||
return deps
|
||||
|
||||
|
||||
def suppression_flags(category: str, text: str) -> List[str]:
|
||||
flags: List[str] = []
|
||||
if category == "tooling":
|
||||
flags.append("tooling")
|
||||
normalized = text.lower()
|
||||
for keyword, flag in SUPPRESSION_KEYWORDS:
|
||||
if keyword in normalized and flag not in flags:
|
||||
flags.append(flag)
|
||||
return flags
|
||||
|
||||
|
||||
def make_missing_entries(
|
||||
manifest_modules: List[dict],
|
||||
repos: Iterable,
|
||||
) -> List[dict]:
|
||||
by_key: Dict[str, dict] = {module.get("key"): module for module in manifest_modules if module.get("key")}
|
||||
by_repo: Dict[str, dict] = {
|
||||
normalize_repo_url(str(module.get("repo", ""))): module
|
||||
for module in manifest_modules
|
||||
if module.get("repo")
|
||||
}
|
||||
missing: List[dict] = []
|
||||
|
||||
for record in repos:
|
||||
repo = record.data
|
||||
repo_url = normalize_repo_url(repo.get("clone_url") or repo.get("html_url") or "")
|
||||
existing = by_repo.get(repo_url)
|
||||
key = repo_name_to_key(repo.get("name", ""))
|
||||
if not existing:
|
||||
existing = by_key.get(key)
|
||||
if existing:
|
||||
continue
|
||||
module_type = record.module_type
|
||||
category = CATEGORY_BY_TYPE.get(module_type, "uncategorized")
|
||||
description = repo.get("description") or ""
|
||||
combined_text = " ".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
repo.get("full_name"),
|
||||
description,
|
||||
" ".join(repo.get("topics") or []),
|
||||
],
|
||||
)
|
||||
)
|
||||
entry = {
|
||||
"key": key,
|
||||
"repo_name": repo.get("full_name"),
|
||||
"topic": record.topic_expr,
|
||||
"repo_url": repo.get("html_url") or repo.get("clone_url"),
|
||||
"description": description,
|
||||
"topics": repo.get("topics") or [],
|
||||
"type": module_type,
|
||||
"category": category,
|
||||
"implied_dependencies": implied_dependencies(module_type, combined_text),
|
||||
"flags": suppression_flags(category, combined_text),
|
||||
}
|
||||
missing.append(entry)
|
||||
missing.sort(key=lambda item: item["key"])
|
||||
return missing
|
||||
|
||||
|
||||
def main(argv: Sequence[str]) -> int:
|
||||
args = parse_args(argv)
|
||||
topics = args.topics or DEFAULT_TOPICS
|
||||
token = args.token or os.environ.get("GITHUB_TOKEN") or os.environ.get("GITHUB_API_TOKEN")
|
||||
if not token:
|
||||
print(
|
||||
"Warning: no GitHub token provided, falling back to anonymous rate limit",
|
||||
file=sys.stderr,
|
||||
)
|
||||
client = GitHubClient(token, verbose=args.log)
|
||||
|
||||
manifest = load_manifest(args.manifest)
|
||||
repos = collect_repositories(client, topics, args.max_pages)
|
||||
missing = make_missing_entries(manifest.get("modules", []), repos)
|
||||
|
||||
output_path = Path(args.output)
|
||||
output_path.write_text(json.dumps(missing, indent=2))
|
||||
print(f"Wrote {len(missing)} entries to {output_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main(sys.argv[1:]))
|
||||
Reference in New Issue
Block a user