refactor module db importing

2026-02-04 03:13:50 +00:00 · 2025-11-16 01:32:41 -05:00
parent 05e486ae4f
commit ea908dbbcf
15 changed files with 2120 additions and 31 deletions
--- a/scripts/python/pycache/modules.cpython-312.pyc
+++ b/scripts/python/pycache/modules.cpython-312.pyc
--- a/scripts/python/pycache/update_module_manifest.cpython-312.pyc
+++ b/scripts/python/pycache/update_module_manifest.cpython-312.pyc
--- a/scripts/python/modules.py
+++ b/scripts/python/modules.py
@@ -82,6 +82,64 @@ def load_manifest(manifest_path: Path) -> List[Dict[str, object]]:
    return validated


+def discover_sql_files(module_path: Path, module_name: str) -> Dict[str, List[str]]:
+    """
+    Scan module for SQL files.
+
+    Returns:
+        Dict mapping database type to list of SQL file paths
+        Example: {
+            'db_auth': [Path('file1.sql'), ...],
+            'db_world': [Path('file2.sql'), ...],
+            'db_characters': [Path('file3.sql'), ...]
+        }
+    """
+    sql_files: Dict[str, List[str]] = {}
+    sql_base = module_path / 'data' / 'sql'
+
+    if not sql_base.exists():
+        return sql_files
+
+    # Map to support both underscore and hyphen naming conventions
+    db_types = {
+        'db_auth': ['db_auth', 'db-auth'],
+        'db_world': ['db_world', 'db-world'],
+        'db_characters': ['db_characters', 'db-characters'],
+        'db_playerbots': ['db_playerbots', 'db-playerbots']
+    }
+
+    for canonical_name, variants in db_types.items():
+        # Check base/ with all variants
+        for variant in variants:
+            base_dir = sql_base / 'base' / variant
+            if base_dir.exists():
+                for sql_file in base_dir.glob('*.sql'):
+                    sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
+
+        # Check updates/ with all variants
+        for variant in variants:
+            updates_dir = sql_base / 'updates' / variant
+            if updates_dir.exists():
+                for sql_file in updates_dir.glob('*.sql'):
+                    sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
+
+        # Check custom/ with all variants
+        for variant in variants:
+            custom_dir = sql_base / 'custom' / variant
+            if custom_dir.exists():
+                for sql_file in custom_dir.glob('*.sql'):
+                    sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
+
+        # ALSO check direct db-type directories (legacy format used by many modules)
+        for variant in variants:
+            direct_dir = sql_base / variant
+            if direct_dir.exists():
+                for sql_file in direct_dir.glob('*.sql'):
+                    sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
+
+    return sql_files
+
+
@dataclass
 class ModuleState:
    key: str
@@ -103,6 +161,7 @@ class ModuleState:
    dependency_issues: List[str] = field(default_factory=list)
    warnings: List[str] = field(default_factory=list)
    errors: List[str] = field(default_factory=list)
+    sql_files: Dict[str, List[str]] = field(default_factory=dict)

    @property
    def blocked(self) -> bool:
@@ -340,6 +399,30 @@ def write_outputs(state: ModuleCollectionState, output_dir: Path) -> None:
        encoding="utf-8",
    )

+    # Discover SQL files for all modules in output directory
+    for module in state.modules:
+        module_path = output_dir / module.name
+        if module_path.exists():
+            module.sql_files = discover_sql_files(module_path, module.name)
+
+    # Generate SQL manifest for enabled modules with SQL files
+    sql_manifest = {
+        "modules": [
+            {
+                "name": module.name,
+                "key": module.key,
+                "sql_files": module.sql_files
+            }
+            for module in state.enabled_modules()
+            if module.sql_files
+        ]
+    }
+    sql_manifest_path = output_dir / ".sql-manifest.json"
+    sql_manifest_path.write_text(
+        json.dumps(sql_manifest, indent=2) + "\n",
+        encoding="utf-8",
+    )
+

 def print_list(state: ModuleCollectionState, selector: str) -> None:
    if selector == "compile":
--- a/scripts/python/update_module_manifest.py
+++ b/scripts/python/update_module_manifest.py
@@ -0,0 +1,298 @@
+#!/usr/bin/env python3
+"""Generate or update config/module-manifest.json from GitHub topics.
+
+The script queries the GitHub Search API for repositories tagged with
+AzerothCore-specific topics (for example ``azerothcore-module`` or
+``azerothcore-lua``) and merges the discovered projects into the existing
+module manifest.  It intentionally keeps all user-defined fields intact so the
+script can be run safely in CI or locally to add new repositories as they are
+published.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sys
+import time
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Sequence
+from urllib import error, parse, request
+
+API_ROOT = "https://api.github.com"
+DEFAULT_TOPICS = [
+    "azerothcore-module",
+    "azerothcore-module+ac-premium",
+    "azerothcore-tools",
+    "azerothcore-lua",
+    "azerothcore-sql",
+]
+# Map topic keywords to module ``type`` values used in the manifest.
+TOPIC_TYPE_HINTS = {
+    "azerothcore-lua": "lua",
+    "lua": "lua",
+    "azerothcore-sql": "sql",
+    "sql": "sql",
+    "azerothcore-tools": "tool",
+    "tools": "tool",
+}
+CATEGORY_BY_TYPE = {
+    "lua": "scripting",
+    "sql": "database",
+    "tool": "tooling",
+    "data": "data",
+    "cpp": "uncategorized",
+}
+USER_AGENT = "acore-compose-module-manifest"
+
+
+def parse_args(argv: Sequence[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--manifest",
+        default="config/module-manifest.json",
+        help="Path to manifest JSON file (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--topic",
+        action="append",
+        default=[],
+        dest="topics",
+        help="GitHub topic (or '+' separated topics) to scan. Defaults to core topics if not provided.",
+    )
+    parser.add_argument(
+        "--token",
+        help="GitHub API token (defaults to $GITHUB_TOKEN or $GITHUB_API_TOKEN)",
+    )
+    parser.add_argument(
+        "--max-pages",
+        type=int,
+        default=10,
+        help="Maximum pages (x100 results) to fetch per topic (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--refresh-existing",
+        action="store_true",
+        help="Refresh name/description/type for repos already present in manifest",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Fetch and display the summary without writing to disk",
+    )
+    parser.add_argument(
+        "--log",
+        action="store_true",
+        help="Print verbose progress information",
+    )
+    return parser.parse_args(argv)
+
+
+@dataclass
+class RepoRecord:
+    data: dict
+    topic_expr: str
+    module_type: str
+
+
+class GitHubClient:
+    def __init__(self, token: Optional[str], verbose: bool = False) -> None:
+        self.token = token
+        self.verbose = verbose
+
+    def _request(self, url: str) -> dict:
+        req = request.Request(url)
+        req.add_header("Accept", "application/vnd.github+json")
+        req.add_header("User-Agent", USER_AGENT)
+        if self.token:
+            req.add_header("Authorization", f"Bearer {self.token}")
+        try:
+            with request.urlopen(req) as resp:
+                payload = resp.read().decode("utf-8")
+                return json.loads(payload)
+        except error.HTTPError as exc:  # pragma: no cover - network failure path
+            detail = exc.read().decode("utf-8", errors="ignore")
+            raise RuntimeError(f"GitHub API request failed: {exc.code} {exc.reason}: {detail}") from exc
+
+    def search_repositories(self, topic_expr: str, max_pages: int) -> List[dict]:
+        query = build_topic_query(topic_expr)
+        results: List[dict] = []
+        for page in range(1, max_pages + 1):
+            url = (
+                f"{API_ROOT}/search/repositories?"
+                f"q={parse.quote(query)}&per_page=100&page={page}&sort=updated&order=desc"
+            )
+            data = self._request(url)
+            items = data.get("items", [])
+            if self.verbose:
+                print(f"Fetched {len(items)} repos for '{topic_expr}' (page {page})")
+            results.extend(items)
+            if len(items) < 100:
+                break
+            # Avoid secondary rate-limits.
+            time.sleep(0.5)
+        return results
+
+
+def build_topic_query(expr: str) -> str:
+    parts = [part.strip() for part in expr.split("+") if part.strip()]
+    if not parts:
+        raise ValueError("Topic expression must contain at least one topic")
+    return "+".join(f"topic:{part}" for part in parts)
+
+
+def guess_module_type(expr: str) -> str:
+    parts = [part.strip().lower() for part in expr.split("+") if part.strip()]
+    for part in parts:
+        hint = TOPIC_TYPE_HINTS.get(part)
+        if hint:
+            return hint
+    return "cpp"
+
+
+def normalize_repo_url(url: str) -> str:
+    if url.endswith(".git"):
+        return url[:-4]
+    return url
+
+
+def repo_name_to_key(name: str) -> str:
+    sanitized = re.sub(r"[^A-Za-z0-9]+", "_", name).strip("_")
+    sanitized = sanitized.upper()
+    if not sanitized:
+        sanitized = "MODULE_UNKNOWN"
+    if not sanitized.startswith("MODULE_"):
+        sanitized = f"MODULE_{sanitized}"
+    return sanitized
+
+
+def load_manifest(path: str) -> Dict[str, List[dict]]:
+    manifest_path = os.path.abspath(path)
+    if not os.path.exists(manifest_path):
+        return {"modules": []}
+    try:
+        with open(manifest_path, "r", encoding="utf-8") as handle:
+            return json.load(handle)
+    except json.JSONDecodeError as exc:
+        raise RuntimeError(f"Unable to parse manifest {path}: {exc}") from exc
+
+
+def ensure_defaults(entry: dict) -> None:
+    entry.setdefault("type", "cpp")
+    entry.setdefault("status", "active")
+    entry.setdefault("order", 5000)
+    entry.setdefault("requires", [])
+    entry.setdefault("post_install_hooks", [])
+    entry.setdefault("config_cleanup", [])
+
+
+def update_entry_from_repo(entry: dict, repo: dict, repo_type: str, topic_expr: str, refresh: bool) -> None:
+    # Only overwrite descriptive fields when refresh is enabled or when they are missing.
+    if refresh or not entry.get("name"):
+        entry["name"] = repo.get("name") or entry.get("name")
+    if refresh or not entry.get("repo"):
+        entry["repo"] = repo.get("clone_url") or repo.get("html_url", entry.get("repo"))
+    if refresh or not entry.get("description"):
+        entry["description"] = repo.get("description") or entry.get("description", "")
+    if refresh or not entry.get("type"):
+        entry["type"] = repo_type
+    if refresh or not entry.get("category"):
+        entry["category"] = CATEGORY_BY_TYPE.get(repo_type, entry.get("category", "uncategorized"))
+    ensure_defaults(entry)
+    notes = entry.get("notes") or ""
+    tag_note = f"Discovered via GitHub topic '{topic_expr}'"
+    if tag_note not in notes:
+        entry["notes"] = (notes + " \n" + tag_note).strip()
+
+
+def merge_repositories(
+    manifest: Dict[str, List[dict]],
+    repos: Iterable[RepoRecord],
+    refresh_existing: bool,
+) -> tuple[int, int]:
+    modules = manifest.setdefault("modules", [])
+    by_key = {module.get("key"): module for module in modules if module.get("key")}
+    by_repo = {
+        normalize_repo_url(str(module.get("repo", ""))): module
+        for module in modules
+        if module.get("repo")
+    }
+    added = 0
+    updated = 0
+
+    for record in repos:
+        repo = record.data
+        repo_url = normalize_repo_url(repo.get("clone_url") or repo.get("html_url") or "")
+        existing = by_repo.get(repo_url)
+        key = repo_name_to_key(repo.get("name", ""))
+        if not existing:
+            existing = by_key.get(key)
+        if not existing:
+            existing = {
+                "key": key,
+                "name": repo.get("name", key),
+                "repo": repo.get("clone_url") or repo.get("html_url", ""),
+                "description": repo.get("description") or "",
+                "type": record.module_type,
+                "category": CATEGORY_BY_TYPE.get(record.module_type, "uncategorized"),
+                "notes": "",
+            }
+            ensure_defaults(existing)
+            modules.append(existing)
+            by_key[key] = existing
+            if repo_url:
+                by_repo[repo_url] = existing
+            added += 1
+        else:
+            updated += 1
+        update_entry_from_repo(existing, repo, record.module_type, record.topic_expr, refresh_existing)
+
+    return added, updated
+
+
+def collect_repositories(
+    client: GitHubClient, topics: Sequence[str], max_pages: int
+) -> List[RepoRecord]:
+    seen: Dict[str, RepoRecord] = {}
+    for expr in topics:
+        repos = client.search_repositories(expr, max_pages)
+        repo_type = guess_module_type(expr)
+        for repo in repos:
+            full_name = repo.get("full_name")
+            if not full_name:
+                continue
+            record = seen.get(full_name)
+            if record is None:
+                seen[full_name] = RepoRecord(repo, expr, repo_type)
+            else:
+                # Prefer the most specific type (non-default) if available.
+                if record.module_type == "cpp" and repo_type != "cpp":
+                    record.module_type = repo_type
+    return list(seen.values())
+
+
+def main(argv: Sequence[str]) -> int:
+    args = parse_args(argv)
+    topics = args.topics or DEFAULT_TOPICS
+    token = args.token or os.environ.get("GITHUB_TOKEN") or os.environ.get("GITHUB_API_TOKEN")
+    client = GitHubClient(token, verbose=args.log)
+
+    manifest = load_manifest(args.manifest)
+    repos = collect_repositories(client, topics, args.max_pages)
+    added, updated = merge_repositories(manifest, repos, args.refresh_existing)
+    if args.dry_run:
+        print(f"Discovered {len(repos)} repositories (added={added}, updated={updated})")
+        return 0
+
+    with open(args.manifest, "w", encoding="utf-8") as handle:
+        json.dump(manifest, handle, indent=2)
+        handle.write("\n")
+
+    print(f"Updated manifest {args.manifest}: added {added}, refreshed {updated}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))