refactor module db importing

This commit is contained in:
uprightbass360
2025-11-16 01:32:41 -05:00
parent 05e486ae4f
commit ea908dbbcf
15 changed files with 2120 additions and 31 deletions

Binary file not shown.

View File

@@ -82,6 +82,64 @@ def load_manifest(manifest_path: Path) -> List[Dict[str, object]]:
return validated
def discover_sql_files(module_path: Path, module_name: str) -> Dict[str, List[str]]:
"""
Scan module for SQL files.
Returns:
Dict mapping database type to list of SQL file paths
Example: {
'db_auth': [Path('file1.sql'), ...],
'db_world': [Path('file2.sql'), ...],
'db_characters': [Path('file3.sql'), ...]
}
"""
sql_files: Dict[str, List[str]] = {}
sql_base = module_path / 'data' / 'sql'
if not sql_base.exists():
return sql_files
# Map to support both underscore and hyphen naming conventions
db_types = {
'db_auth': ['db_auth', 'db-auth'],
'db_world': ['db_world', 'db-world'],
'db_characters': ['db_characters', 'db-characters'],
'db_playerbots': ['db_playerbots', 'db-playerbots']
}
for canonical_name, variants in db_types.items():
# Check base/ with all variants
for variant in variants:
base_dir = sql_base / 'base' / variant
if base_dir.exists():
for sql_file in base_dir.glob('*.sql'):
sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
# Check updates/ with all variants
for variant in variants:
updates_dir = sql_base / 'updates' / variant
if updates_dir.exists():
for sql_file in updates_dir.glob('*.sql'):
sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
# Check custom/ with all variants
for variant in variants:
custom_dir = sql_base / 'custom' / variant
if custom_dir.exists():
for sql_file in custom_dir.glob('*.sql'):
sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
# ALSO check direct db-type directories (legacy format used by many modules)
for variant in variants:
direct_dir = sql_base / variant
if direct_dir.exists():
for sql_file in direct_dir.glob('*.sql'):
sql_files.setdefault(canonical_name, []).append(str(sql_file.relative_to(module_path)))
return sql_files
@dataclass
class ModuleState:
key: str
@@ -103,6 +161,7 @@ class ModuleState:
dependency_issues: List[str] = field(default_factory=list)
warnings: List[str] = field(default_factory=list)
errors: List[str] = field(default_factory=list)
sql_files: Dict[str, List[str]] = field(default_factory=dict)
@property
def blocked(self) -> bool:
@@ -340,6 +399,30 @@ def write_outputs(state: ModuleCollectionState, output_dir: Path) -> None:
encoding="utf-8",
)
# Discover SQL files for all modules in output directory
for module in state.modules:
module_path = output_dir / module.name
if module_path.exists():
module.sql_files = discover_sql_files(module_path, module.name)
# Generate SQL manifest for enabled modules with SQL files
sql_manifest = {
"modules": [
{
"name": module.name,
"key": module.key,
"sql_files": module.sql_files
}
for module in state.enabled_modules()
if module.sql_files
]
}
sql_manifest_path = output_dir / ".sql-manifest.json"
sql_manifest_path.write_text(
json.dumps(sql_manifest, indent=2) + "\n",
encoding="utf-8",
)
def print_list(state: ModuleCollectionState, selector: str) -> None:
if selector == "compile":

View File

@@ -0,0 +1,298 @@
#!/usr/bin/env python3
"""Generate or update config/module-manifest.json from GitHub topics.
The script queries the GitHub Search API for repositories tagged with
AzerothCore-specific topics (for example ``azerothcore-module`` or
``azerothcore-lua``) and merges the discovered projects into the existing
module manifest. It intentionally keeps all user-defined fields intact so the
script can be run safely in CI or locally to add new repositories as they are
published.
"""
from __future__ import annotations
import argparse
import json
import os
import re
import sys
import time
from dataclasses import dataclass
from typing import Dict, Iterable, List, Optional, Sequence
from urllib import error, parse, request
API_ROOT = "https://api.github.com"
DEFAULT_TOPICS = [
"azerothcore-module",
"azerothcore-module+ac-premium",
"azerothcore-tools",
"azerothcore-lua",
"azerothcore-sql",
]
# Map topic keywords to module ``type`` values used in the manifest.
TOPIC_TYPE_HINTS = {
"azerothcore-lua": "lua",
"lua": "lua",
"azerothcore-sql": "sql",
"sql": "sql",
"azerothcore-tools": "tool",
"tools": "tool",
}
CATEGORY_BY_TYPE = {
"lua": "scripting",
"sql": "database",
"tool": "tooling",
"data": "data",
"cpp": "uncategorized",
}
USER_AGENT = "acore-compose-module-manifest"
def parse_args(argv: Sequence[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--manifest",
default="config/module-manifest.json",
help="Path to manifest JSON file (default: %(default)s)",
)
parser.add_argument(
"--topic",
action="append",
default=[],
dest="topics",
help="GitHub topic (or '+' separated topics) to scan. Defaults to core topics if not provided.",
)
parser.add_argument(
"--token",
help="GitHub API token (defaults to $GITHUB_TOKEN or $GITHUB_API_TOKEN)",
)
parser.add_argument(
"--max-pages",
type=int,
default=10,
help="Maximum pages (x100 results) to fetch per topic (default: %(default)s)",
)
parser.add_argument(
"--refresh-existing",
action="store_true",
help="Refresh name/description/type for repos already present in manifest",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Fetch and display the summary without writing to disk",
)
parser.add_argument(
"--log",
action="store_true",
help="Print verbose progress information",
)
return parser.parse_args(argv)
@dataclass
class RepoRecord:
data: dict
topic_expr: str
module_type: str
class GitHubClient:
def __init__(self, token: Optional[str], verbose: bool = False) -> None:
self.token = token
self.verbose = verbose
def _request(self, url: str) -> dict:
req = request.Request(url)
req.add_header("Accept", "application/vnd.github+json")
req.add_header("User-Agent", USER_AGENT)
if self.token:
req.add_header("Authorization", f"Bearer {self.token}")
try:
with request.urlopen(req) as resp:
payload = resp.read().decode("utf-8")
return json.loads(payload)
except error.HTTPError as exc: # pragma: no cover - network failure path
detail = exc.read().decode("utf-8", errors="ignore")
raise RuntimeError(f"GitHub API request failed: {exc.code} {exc.reason}: {detail}") from exc
def search_repositories(self, topic_expr: str, max_pages: int) -> List[dict]:
query = build_topic_query(topic_expr)
results: List[dict] = []
for page in range(1, max_pages + 1):
url = (
f"{API_ROOT}/search/repositories?"
f"q={parse.quote(query)}&per_page=100&page={page}&sort=updated&order=desc"
)
data = self._request(url)
items = data.get("items", [])
if self.verbose:
print(f"Fetched {len(items)} repos for '{topic_expr}' (page {page})")
results.extend(items)
if len(items) < 100:
break
# Avoid secondary rate-limits.
time.sleep(0.5)
return results
def build_topic_query(expr: str) -> str:
parts = [part.strip() for part in expr.split("+") if part.strip()]
if not parts:
raise ValueError("Topic expression must contain at least one topic")
return "+".join(f"topic:{part}" for part in parts)
def guess_module_type(expr: str) -> str:
parts = [part.strip().lower() for part in expr.split("+") if part.strip()]
for part in parts:
hint = TOPIC_TYPE_HINTS.get(part)
if hint:
return hint
return "cpp"
def normalize_repo_url(url: str) -> str:
if url.endswith(".git"):
return url[:-4]
return url
def repo_name_to_key(name: str) -> str:
sanitized = re.sub(r"[^A-Za-z0-9]+", "_", name).strip("_")
sanitized = sanitized.upper()
if not sanitized:
sanitized = "MODULE_UNKNOWN"
if not sanitized.startswith("MODULE_"):
sanitized = f"MODULE_{sanitized}"
return sanitized
def load_manifest(path: str) -> Dict[str, List[dict]]:
manifest_path = os.path.abspath(path)
if not os.path.exists(manifest_path):
return {"modules": []}
try:
with open(manifest_path, "r", encoding="utf-8") as handle:
return json.load(handle)
except json.JSONDecodeError as exc:
raise RuntimeError(f"Unable to parse manifest {path}: {exc}") from exc
def ensure_defaults(entry: dict) -> None:
entry.setdefault("type", "cpp")
entry.setdefault("status", "active")
entry.setdefault("order", 5000)
entry.setdefault("requires", [])
entry.setdefault("post_install_hooks", [])
entry.setdefault("config_cleanup", [])
def update_entry_from_repo(entry: dict, repo: dict, repo_type: str, topic_expr: str, refresh: bool) -> None:
# Only overwrite descriptive fields when refresh is enabled or when they are missing.
if refresh or not entry.get("name"):
entry["name"] = repo.get("name") or entry.get("name")
if refresh or not entry.get("repo"):
entry["repo"] = repo.get("clone_url") or repo.get("html_url", entry.get("repo"))
if refresh or not entry.get("description"):
entry["description"] = repo.get("description") or entry.get("description", "")
if refresh or not entry.get("type"):
entry["type"] = repo_type
if refresh or not entry.get("category"):
entry["category"] = CATEGORY_BY_TYPE.get(repo_type, entry.get("category", "uncategorized"))
ensure_defaults(entry)
notes = entry.get("notes") or ""
tag_note = f"Discovered via GitHub topic '{topic_expr}'"
if tag_note not in notes:
entry["notes"] = (notes + " \n" + tag_note).strip()
def merge_repositories(
manifest: Dict[str, List[dict]],
repos: Iterable[RepoRecord],
refresh_existing: bool,
) -> tuple[int, int]:
modules = manifest.setdefault("modules", [])
by_key = {module.get("key"): module for module in modules if module.get("key")}
by_repo = {
normalize_repo_url(str(module.get("repo", ""))): module
for module in modules
if module.get("repo")
}
added = 0
updated = 0
for record in repos:
repo = record.data
repo_url = normalize_repo_url(repo.get("clone_url") or repo.get("html_url") or "")
existing = by_repo.get(repo_url)
key = repo_name_to_key(repo.get("name", ""))
if not existing:
existing = by_key.get(key)
if not existing:
existing = {
"key": key,
"name": repo.get("name", key),
"repo": repo.get("clone_url") or repo.get("html_url", ""),
"description": repo.get("description") or "",
"type": record.module_type,
"category": CATEGORY_BY_TYPE.get(record.module_type, "uncategorized"),
"notes": "",
}
ensure_defaults(existing)
modules.append(existing)
by_key[key] = existing
if repo_url:
by_repo[repo_url] = existing
added += 1
else:
updated += 1
update_entry_from_repo(existing, repo, record.module_type, record.topic_expr, refresh_existing)
return added, updated
def collect_repositories(
client: GitHubClient, topics: Sequence[str], max_pages: int
) -> List[RepoRecord]:
seen: Dict[str, RepoRecord] = {}
for expr in topics:
repos = client.search_repositories(expr, max_pages)
repo_type = guess_module_type(expr)
for repo in repos:
full_name = repo.get("full_name")
if not full_name:
continue
record = seen.get(full_name)
if record is None:
seen[full_name] = RepoRecord(repo, expr, repo_type)
else:
# Prefer the most specific type (non-default) if available.
if record.module_type == "cpp" and repo_type != "cpp":
record.module_type = repo_type
return list(seen.values())
def main(argv: Sequence[str]) -> int:
args = parse_args(argv)
topics = args.topics or DEFAULT_TOPICS
token = args.token or os.environ.get("GITHUB_TOKEN") or os.environ.get("GITHUB_API_TOKEN")
client = GitHubClient(token, verbose=args.log)
manifest = load_manifest(args.manifest)
repos = collect_repositories(client, topics, args.max_pages)
added, updated = merge_repositories(manifest, repos, args.refresh_existing)
if args.dry_run:
print(f"Discovered {len(repos)} repositories (added={added}, updated={updated})")
return 0
with open(args.manifest, "w", encoding="utf-8") as handle:
json.dump(manifest, handle, indent=2)
handle.write("\n")
print(f"Updated manifest {args.manifest}: added {added}, refreshed {updated}")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))