smarttools/scripts/sync_to_db.py

109 lines
3.2 KiB
Python

#!/usr/bin/env python3
"""Sync registry tools into the database.
Usage: python scripts/sync_to_db.py /path/to/registry/repo
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Dict, Any
import yaml
# Allow running from repo root
sys.path.append(str(Path(__file__).resolve().parents[1] / "src"))
from smarttools.registry.db import connect_db, query_one
from smarttools.registry.sync import ensure_publisher, normalize_tags
def load_yaml(path: Path) -> Dict[str, Any]:
return yaml.safe_load(path.read_text(encoding="utf-8")) or {}
def sync_repo(repo_root: Path) -> int:
tools_root = repo_root / "tools"
if not tools_root.exists():
print(f"Missing tools directory at {tools_root}")
return 1
conn = connect_db()
synced = 0
skipped = 0
try:
for config_path in tools_root.glob("*/*/config.yaml"):
owner = config_path.parent.parent.name
name = config_path.parent.name
config_text = config_path.read_text(encoding="utf-8")
data = load_yaml(config_path)
version = (data.get("version") or "").strip()
if not version:
skipped += 1
continue
existing = query_one(
conn,
"SELECT id FROM tools WHERE owner = ? AND name = ? AND version = ?",
[owner, name, version],
)
if existing:
skipped += 1
continue
readme_path = config_path.parent / "README.md"
readme_text = readme_path.read_text(encoding="utf-8") if readme_path.exists() else ""
publisher_id = ensure_publisher(conn, owner)
tags = normalize_tags(data.get("tags"))
conn.execute(
"""
INSERT INTO tools (
owner, name, version, description, category, tags, config_yaml, readme,
publisher_id, deprecated, deprecated_message, replacement, downloads, published_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
[
owner,
name,
version,
data.get("description"),
data.get("category"),
tags,
config_text,
readme_text,
publisher_id,
int(bool(data.get("deprecated"))),
data.get("deprecated_message"),
data.get("replacement"),
int((data.get("registry") or {}).get("downloads", 0) or 0),
(data.get("registry") or {}).get("published_at"),
],
)
synced += 1
conn.commit()
finally:
conn.close()
print(f"Synced: {synced}")
print(f"Skipped (existing/invalid): {skipped}")
return 0
def main() -> int:
if len(sys.argv) < 2:
print("Usage: python scripts/sync_to_db.py /path/to/registry/repo")
return 1
repo_root = Path(sys.argv[1])
return sync_repo(repo_root)
if __name__ == "__main__":
raise SystemExit(main())