"""Registry repository sync and webhook processing.""" from __future__ import annotations import hashlib import hmac import json import os import shutil import subprocess import time from datetime import datetime from pathlib import Path from typing import Any, Dict, Iterable, Tuple import yaml from .db import connect_db, query_one def get_repo_dir() -> Path: default_dir = Path.home() / ".cmdforge" / "registry" / "repo" return Path(os.environ.get("CMDFORGE_REGISTRY_REPO_DIR", default_dir)) def get_repo_url() -> str: return os.environ.get("CMDFORGE_REGISTRY_REPO_URL", "https://gitea.brrd.tech/rob/CmdForge-Registry.git") def get_repo_branch() -> str: return os.environ.get("CMDFORGE_REGISTRY_REPO_BRANCH", "main") def get_categories_cache_path() -> Path: return Path(os.environ.get( "CMDFORGE_REGISTRY_CATEGORIES_CACHE", Path.home() / ".cmdforge" / "registry" / "categories_cache.json", )) def verify_hmac(body: bytes, signature: str | None, secret: str) -> bool: if not signature: return False expected = hmac.new(secret.encode(), body, hashlib.sha256).hexdigest() return hmac.compare_digest(signature, expected) def clone_or_update_repo(repo_dir: Path) -> None: repo_dir.parent.mkdir(parents=True, exist_ok=True) if not repo_dir.exists(): subprocess.run( ["git", "clone", "--depth", "1", "--branch", get_repo_branch(), get_repo_url(), str(repo_dir)], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) return subprocess.run(["git", "-C", str(repo_dir), "fetch", "origin"], check=True) subprocess.run( ["git", "-C", str(repo_dir), "reset", "--hard", f"origin/{get_repo_branch()}"] , check=True ) def load_yaml(path: Path) -> Dict[str, Any]: with path.open("r", encoding="utf-8") as handle: return yaml.safe_load(handle) or {} def ensure_publisher(conn, owner: str) -> int: row = query_one(conn, "SELECT id FROM publishers WHERE slug = ?", [owner]) if row: return int(row["id"]) placeholder_email = f"{owner}@registry.local" conn.execute( """ INSERT INTO publishers (email, password_hash, slug, display_name, verified) VALUES (?, ?, ?, ?, ?) """, [placeholder_email, "", owner, owner, False], ) return int(conn.execute("SELECT last_insert_rowid() AS id").fetchone()["id"]) def normalize_tags(tags_value: Any) -> str: if not tags_value: return "[]" if isinstance(tags_value, list): return json.dumps(tags_value) return json.dumps([str(tags_value)]) def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: str, readme_text: str | None) -> None: version = data.get("version") if not version: return publisher_id = ensure_publisher(conn, owner) registry_meta = data.get("registry", {}) or {} tags = normalize_tags(data.get("tags")) description = data.get("description") category = data.get("category") deprecated = bool(data.get("deprecated", False)) deprecated_message = data.get("deprecated_message") replacement = data.get("replacement") downloads = registry_meta.get("downloads") published_at = registry_meta.get("published_at") existing = query_one( conn, "SELECT id FROM tools WHERE owner = ? AND name = ? AND version = ?", [owner, name, version], ) if existing: conn.execute( """ UPDATE tools SET description = ?, category = ?, tags = ?, config_yaml = ?, readme = ?, deprecated = ?, deprecated_message = ?, replacement = ?, downloads = COALESCE(?, downloads), published_at = COALESCE(?, published_at) WHERE id = ? """, [ description, category, tags, config_text, readme_text, int(deprecated), deprecated_message, replacement, downloads, published_at, existing["id"], ], ) else: conn.execute( """ INSERT INTO tools ( owner, name, version, description, category, tags, config_yaml, readme, publisher_id, deprecated, deprecated_message, replacement, downloads, published_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, [ owner, name, version, description, category, tags, config_text, readme_text, publisher_id, int(deprecated), deprecated_message, replacement, downloads or 0, published_at, ], ) def sync_categories(repo_dir: Path) -> None: categories_path = repo_dir / "categories" / "categories.yaml" if not categories_path.exists(): return payload = load_yaml(categories_path) cache_path = get_categories_cache_path() cache_path.parent.mkdir(parents=True, exist_ok=True) cache_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") def sync_collections(conn, repo_dir: Path) -> None: """Sync collections from registry repo to database.""" collections_dir = repo_dir / "collections" if not collections_dir.exists(): return for collection_path in collections_dir.glob("*.yaml"): try: data = load_yaml(collection_path) name = data.get("name") or collection_path.stem display_name = data.get("display_name") or name description = data.get("description") icon = data.get("icon") maintainer = data.get("maintainer") or "official" tools = data.get("tools") or [] pinned = data.get("pinned") or {} tags = data.get("tags") or [] tools_json = json.dumps(tools) pinned_json = json.dumps(pinned) if pinned else None tags_json = json.dumps(tags) if tags else None existing = query_one(conn, "SELECT id FROM collections WHERE name = ?", [name]) if existing: conn.execute( """ UPDATE collections SET display_name = ?, description = ?, icon = ?, maintainer = ?, tools = ?, pinned = ?, tags = ?, updated_at = ? WHERE id = ? """, [display_name, description, icon, maintainer, tools_json, pinned_json, tags_json, datetime.utcnow().isoformat(), existing["id"]], ) else: conn.execute( """ INSERT INTO collections (name, display_name, description, icon, maintainer, tools, pinned, tags) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, [name, display_name, description, icon, maintainer, tools_json, pinned_json, tags_json], ) except Exception: continue def sync_from_repo() -> Tuple[bool, str]: repo_dir = get_repo_dir() clone_or_update_repo(repo_dir) tools_root = repo_dir / "tools" if not tools_root.exists(): return False, "missing_tools_dir" conn = connect_db() try: conn.execute("BEGIN") for config_path in tools_root.glob("*/*/config.yaml"): owner = config_path.parent.parent.name name = config_path.parent.name try: config_text = config_path.read_text(encoding="utf-8") data = yaml.safe_load(config_text) or {} readme_path = config_path.parent / "README.md" readme_text = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None upsert_tool(conn, owner, name, data, config_text, readme_text) except Exception: continue sync_collections(conn, repo_dir) conn.commit() finally: conn.close() sync_categories(repo_dir) return True, "ok" def record_webhook_delivery(conn, delivery_id: str, event_type: str) -> None: conn.execute( "INSERT INTO webhook_log (delivery_id, event_type, processed_at) VALUES (?, ?, ?)", [delivery_id, event_type, datetime.utcnow().isoformat()], ) def is_delivery_processed(conn, delivery_id: str) -> bool: row = query_one(conn, "SELECT 1 FROM webhook_log WHERE delivery_id = ?", [delivery_id]) return bool(row) def acquire_lock(lock_path: Path, timeout: int) -> bool: lock_path.parent.mkdir(parents=True, exist_ok=True) start = time.time() while time.time() - start < timeout: try: fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY) os.close(fd) return True except FileExistsError: time.sleep(0.1) return False def release_lock(lock_path: Path) -> None: if lock_path.exists(): lock_path.unlink() def process_webhook(body: bytes, headers: Dict[str, str], secret: str, timeout: int = 300) -> Tuple[int, Dict[str, Any]]: delivery_id = headers.get("X-Gitea-Delivery") signature = headers.get("X-Gitea-Signature") event_type = headers.get("X-Gitea-Event", "unknown") if not delivery_id: return 400, {"error": {"code": "VALIDATION_ERROR", "message": "Missing X-Gitea-Delivery"}} if not verify_hmac(body, signature, secret): return 401, {"error": {"code": "UNAUTHORIZED", "message": "Invalid webhook signature"}} conn = connect_db() try: if is_delivery_processed(conn, delivery_id): return 200, {"data": {"status": "already_processed"}} lock_path = Path.home() / ".cmdforge" / "registry" / "locks" / "webhook.lock" if not acquire_lock(lock_path, timeout): return 200, {"data": {"status": "skipped", "reason": "sync_in_progress"}} try: if is_delivery_processed(conn, delivery_id): return 200, {"data": {"status": "already_processed"}} ok, reason = sync_from_repo() if ok: record_webhook_delivery(conn, delivery_id, event_type) conn.commit() return 200, {"data": {"status": "processed"}} return 500, {"error": {"code": "SERVER_ERROR", "message": f"Sync failed: {reason}"}} finally: release_lock(lock_path) finally: conn.close()