CmdForge/src/cmdforge/registry/sync.py

316 lines
10 KiB
Python

"""Registry repository sync and webhook processing."""
from __future__ import annotations
import hashlib
import hmac
import json
import os
import shutil
import subprocess
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, Tuple
import yaml
from .db import connect_db, query_one
def get_repo_dir() -> Path:
default_dir = Path.home() / ".cmdforge" / "registry" / "repo"
return Path(os.environ.get("CMDFORGE_REGISTRY_REPO_DIR", default_dir))
def get_repo_url() -> str:
return os.environ.get("CMDFORGE_REGISTRY_REPO_URL", "https://gitea.brrd.tech/rob/CmdForge-Registry.git")
def get_repo_branch() -> str:
return os.environ.get("CMDFORGE_REGISTRY_REPO_BRANCH", "main")
def get_categories_cache_path() -> Path:
return Path(os.environ.get(
"CMDFORGE_REGISTRY_CATEGORIES_CACHE",
Path.home() / ".cmdforge" / "registry" / "categories_cache.json",
))
def verify_hmac(body: bytes, signature: str | None, secret: str) -> bool:
if not signature:
return False
expected = hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
return hmac.compare_digest(signature, expected)
def clone_or_update_repo(repo_dir: Path) -> None:
repo_dir.parent.mkdir(parents=True, exist_ok=True)
if not repo_dir.exists():
subprocess.run(
["git", "clone", "--depth", "1", "--branch", get_repo_branch(), get_repo_url(), str(repo_dir)],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
return
subprocess.run(["git", "-C", str(repo_dir), "fetch", "origin"], check=True)
subprocess.run(
["git", "-C", str(repo_dir), "reset", "--hard", f"origin/{get_repo_branch()}"]
, check=True
)
def load_yaml(path: Path) -> Dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def ensure_publisher(conn, owner: str) -> int:
row = query_one(conn, "SELECT id FROM publishers WHERE slug = ?", [owner])
if row:
return int(row["id"])
placeholder_email = f"{owner}@registry.local"
conn.execute(
"""
INSERT INTO publishers (email, password_hash, slug, display_name, verified)
VALUES (?, ?, ?, ?, ?)
""",
[placeholder_email, "", owner, owner, False],
)
return int(conn.execute("SELECT last_insert_rowid() AS id").fetchone()["id"])
def normalize_tags(tags_value: Any) -> str:
if not tags_value:
return "[]"
if isinstance(tags_value, list):
return json.dumps(tags_value)
return json.dumps([str(tags_value)])
def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: str, readme_text: str | None) -> None:
version = data.get("version")
if not version:
return
publisher_id = ensure_publisher(conn, owner)
registry_meta = data.get("registry", {}) or {}
tags = normalize_tags(data.get("tags"))
description = data.get("description")
category = data.get("category")
deprecated = bool(data.get("deprecated", False))
deprecated_message = data.get("deprecated_message")
replacement = data.get("replacement")
downloads = registry_meta.get("downloads")
published_at = registry_meta.get("published_at")
existing = query_one(
conn,
"SELECT id FROM tools WHERE owner = ? AND name = ? AND version = ?",
[owner, name, version],
)
if existing:
conn.execute(
"""
UPDATE tools
SET description = ?, category = ?, tags = ?, config_yaml = ?, readme = ?,
deprecated = ?, deprecated_message = ?, replacement = ?,
downloads = COALESCE(?, downloads), published_at = COALESCE(?, published_at)
WHERE id = ?
""",
[
description,
category,
tags,
config_text,
readme_text,
int(deprecated),
deprecated_message,
replacement,
downloads,
published_at,
existing["id"],
],
)
else:
conn.execute(
"""
INSERT INTO tools (
owner, name, version, description, category, tags, config_yaml, readme,
publisher_id, deprecated, deprecated_message, replacement, downloads, published_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
[
owner,
name,
version,
description,
category,
tags,
config_text,
readme_text,
publisher_id,
int(deprecated),
deprecated_message,
replacement,
downloads or 0,
published_at,
],
)
def sync_categories(repo_dir: Path) -> None:
categories_path = repo_dir / "categories" / "categories.yaml"
if not categories_path.exists():
return
payload = load_yaml(categories_path)
cache_path = get_categories_cache_path()
cache_path.parent.mkdir(parents=True, exist_ok=True)
cache_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
def sync_collections(conn, repo_dir: Path) -> None:
"""Sync collections from registry repo to database."""
collections_dir = repo_dir / "collections"
if not collections_dir.exists():
return
for collection_path in collections_dir.glob("*.yaml"):
try:
data = load_yaml(collection_path)
name = data.get("name") or collection_path.stem
display_name = data.get("display_name") or name
description = data.get("description")
icon = data.get("icon")
maintainer = data.get("maintainer") or "official"
tools = data.get("tools") or []
pinned = data.get("pinned") or {}
tags = data.get("tags") or []
tools_json = json.dumps(tools)
pinned_json = json.dumps(pinned) if pinned else None
tags_json = json.dumps(tags) if tags else None
existing = query_one(conn, "SELECT id FROM collections WHERE name = ?", [name])
if existing:
conn.execute(
"""
UPDATE collections
SET display_name = ?, description = ?, icon = ?, maintainer = ?,
tools = ?, pinned = ?, tags = ?, updated_at = ?
WHERE id = ?
""",
[display_name, description, icon, maintainer, tools_json,
pinned_json, tags_json, datetime.utcnow().isoformat(), existing["id"]],
)
else:
conn.execute(
"""
INSERT INTO collections (name, display_name, description, icon, maintainer, tools, pinned, tags)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
[name, display_name, description, icon, maintainer, tools_json, pinned_json, tags_json],
)
except Exception:
continue
def sync_from_repo() -> Tuple[bool, str]:
repo_dir = get_repo_dir()
clone_or_update_repo(repo_dir)
tools_root = repo_dir / "tools"
if not tools_root.exists():
return False, "missing_tools_dir"
conn = connect_db()
try:
conn.execute("BEGIN")
for config_path in tools_root.glob("*/*/config.yaml"):
owner = config_path.parent.parent.name
name = config_path.parent.name
try:
config_text = config_path.read_text(encoding="utf-8")
data = yaml.safe_load(config_text) or {}
readme_path = config_path.parent / "README.md"
readme_text = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None
upsert_tool(conn, owner, name, data, config_text, readme_text)
except Exception:
continue
sync_collections(conn, repo_dir)
conn.commit()
finally:
conn.close()
sync_categories(repo_dir)
return True, "ok"
def record_webhook_delivery(conn, delivery_id: str, event_type: str) -> None:
conn.execute(
"INSERT INTO webhook_log (delivery_id, event_type, processed_at) VALUES (?, ?, ?)",
[delivery_id, event_type, datetime.utcnow().isoformat()],
)
def is_delivery_processed(conn, delivery_id: str) -> bool:
row = query_one(conn, "SELECT 1 FROM webhook_log WHERE delivery_id = ?", [delivery_id])
return bool(row)
def acquire_lock(lock_path: Path, timeout: int) -> bool:
lock_path.parent.mkdir(parents=True, exist_ok=True)
start = time.time()
while time.time() - start < timeout:
try:
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
os.close(fd)
return True
except FileExistsError:
time.sleep(0.1)
return False
def release_lock(lock_path: Path) -> None:
if lock_path.exists():
lock_path.unlink()
def process_webhook(body: bytes, headers: Dict[str, str], secret: str, timeout: int = 300) -> Tuple[int, Dict[str, Any]]:
delivery_id = headers.get("X-Gitea-Delivery")
signature = headers.get("X-Gitea-Signature")
event_type = headers.get("X-Gitea-Event", "unknown")
if not delivery_id:
return 400, {"error": {"code": "VALIDATION_ERROR", "message": "Missing X-Gitea-Delivery"}}
if not verify_hmac(body, signature, secret):
return 401, {"error": {"code": "UNAUTHORIZED", "message": "Invalid webhook signature"}}
conn = connect_db()
try:
if is_delivery_processed(conn, delivery_id):
return 200, {"data": {"status": "already_processed"}}
lock_path = Path.home() / ".cmdforge" / "registry" / "locks" / "webhook.lock"
if not acquire_lock(lock_path, timeout):
return 200, {"data": {"status": "skipped", "reason": "sync_in_progress"}}
try:
if is_delivery_processed(conn, delivery_id):
return 200, {"data": {"status": "already_processed"}}
ok, reason = sync_from_repo()
if ok:
record_webhook_delivery(conn, delivery_id, event_type)
conn.commit()
return 200, {"data": {"status": "processed"}}
return 500, {"error": {"code": "SERVER_ERROR", "message": f"Sync failed: {reason}"}}
finally:
release_lock(lock_path)
finally:
conn.close()