316 lines
10 KiB
Python
316 lines
10 KiB
Python
"""Registry repository sync and webhook processing."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import hmac
|
|
import json
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Iterable, Tuple
|
|
|
|
import yaml
|
|
|
|
from .db import connect_db, query_one
|
|
|
|
|
|
def get_repo_dir() -> Path:
|
|
default_dir = Path.home() / ".cmdforge" / "registry" / "repo"
|
|
return Path(os.environ.get("CMDFORGE_REGISTRY_REPO_DIR", default_dir))
|
|
|
|
|
|
def get_repo_url() -> str:
|
|
return os.environ.get("CMDFORGE_REGISTRY_REPO_URL", "https://gitea.brrd.tech/rob/CmdForge-Registry.git")
|
|
|
|
|
|
def get_repo_branch() -> str:
|
|
return os.environ.get("CMDFORGE_REGISTRY_REPO_BRANCH", "main")
|
|
|
|
|
|
def get_categories_cache_path() -> Path:
|
|
return Path(os.environ.get(
|
|
"CMDFORGE_REGISTRY_CATEGORIES_CACHE",
|
|
Path.home() / ".cmdforge" / "registry" / "categories_cache.json",
|
|
))
|
|
|
|
|
|
def verify_hmac(body: bytes, signature: str | None, secret: str) -> bool:
|
|
if not signature:
|
|
return False
|
|
expected = hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
|
|
return hmac.compare_digest(signature, expected)
|
|
|
|
|
|
def clone_or_update_repo(repo_dir: Path) -> None:
|
|
repo_dir.parent.mkdir(parents=True, exist_ok=True)
|
|
if not repo_dir.exists():
|
|
subprocess.run(
|
|
["git", "clone", "--depth", "1", "--branch", get_repo_branch(), get_repo_url(), str(repo_dir)],
|
|
check=True,
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL,
|
|
)
|
|
return
|
|
|
|
subprocess.run(["git", "-C", str(repo_dir), "fetch", "origin"], check=True)
|
|
subprocess.run(
|
|
["git", "-C", str(repo_dir), "reset", "--hard", f"origin/{get_repo_branch()}"]
|
|
, check=True
|
|
)
|
|
|
|
|
|
def load_yaml(path: Path) -> Dict[str, Any]:
|
|
with path.open("r", encoding="utf-8") as handle:
|
|
return yaml.safe_load(handle) or {}
|
|
|
|
|
|
def ensure_publisher(conn, owner: str) -> int:
|
|
row = query_one(conn, "SELECT id FROM publishers WHERE slug = ?", [owner])
|
|
if row:
|
|
return int(row["id"])
|
|
placeholder_email = f"{owner}@registry.local"
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO publishers (email, password_hash, slug, display_name, verified)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""",
|
|
[placeholder_email, "", owner, owner, False],
|
|
)
|
|
return int(conn.execute("SELECT last_insert_rowid() AS id").fetchone()["id"])
|
|
|
|
|
|
def normalize_tags(tags_value: Any) -> str:
|
|
if not tags_value:
|
|
return "[]"
|
|
if isinstance(tags_value, list):
|
|
return json.dumps(tags_value)
|
|
return json.dumps([str(tags_value)])
|
|
|
|
|
|
def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: str, readme_text: str | None) -> None:
|
|
version = data.get("version")
|
|
if not version:
|
|
return
|
|
publisher_id = ensure_publisher(conn, owner)
|
|
registry_meta = data.get("registry", {}) or {}
|
|
|
|
tags = normalize_tags(data.get("tags"))
|
|
description = data.get("description")
|
|
category = data.get("category")
|
|
deprecated = bool(data.get("deprecated", False))
|
|
deprecated_message = data.get("deprecated_message")
|
|
replacement = data.get("replacement")
|
|
downloads = registry_meta.get("downloads")
|
|
published_at = registry_meta.get("published_at")
|
|
|
|
existing = query_one(
|
|
conn,
|
|
"SELECT id FROM tools WHERE owner = ? AND name = ? AND version = ?",
|
|
[owner, name, version],
|
|
)
|
|
|
|
if existing:
|
|
conn.execute(
|
|
"""
|
|
UPDATE tools
|
|
SET description = ?, category = ?, tags = ?, config_yaml = ?, readme = ?,
|
|
deprecated = ?, deprecated_message = ?, replacement = ?,
|
|
downloads = COALESCE(?, downloads), published_at = COALESCE(?, published_at)
|
|
WHERE id = ?
|
|
""",
|
|
[
|
|
description,
|
|
category,
|
|
tags,
|
|
config_text,
|
|
readme_text,
|
|
int(deprecated),
|
|
deprecated_message,
|
|
replacement,
|
|
downloads,
|
|
published_at,
|
|
existing["id"],
|
|
],
|
|
)
|
|
else:
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO tools (
|
|
owner, name, version, description, category, tags, config_yaml, readme,
|
|
publisher_id, deprecated, deprecated_message, replacement, downloads, published_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
[
|
|
owner,
|
|
name,
|
|
version,
|
|
description,
|
|
category,
|
|
tags,
|
|
config_text,
|
|
readme_text,
|
|
publisher_id,
|
|
int(deprecated),
|
|
deprecated_message,
|
|
replacement,
|
|
downloads or 0,
|
|
published_at,
|
|
],
|
|
)
|
|
|
|
|
|
def sync_categories(repo_dir: Path) -> None:
|
|
categories_path = repo_dir / "categories" / "categories.yaml"
|
|
if not categories_path.exists():
|
|
return
|
|
payload = load_yaml(categories_path)
|
|
cache_path = get_categories_cache_path()
|
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
cache_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
|
|
|
|
|
def sync_collections(conn, repo_dir: Path) -> None:
|
|
"""Sync collections from registry repo to database."""
|
|
collections_dir = repo_dir / "collections"
|
|
if not collections_dir.exists():
|
|
return
|
|
|
|
for collection_path in collections_dir.glob("*.yaml"):
|
|
try:
|
|
data = load_yaml(collection_path)
|
|
name = data.get("name") or collection_path.stem
|
|
display_name = data.get("display_name") or name
|
|
description = data.get("description")
|
|
icon = data.get("icon")
|
|
maintainer = data.get("maintainer") or "official"
|
|
tools = data.get("tools") or []
|
|
pinned = data.get("pinned") or {}
|
|
tags = data.get("tags") or []
|
|
|
|
tools_json = json.dumps(tools)
|
|
pinned_json = json.dumps(pinned) if pinned else None
|
|
tags_json = json.dumps(tags) if tags else None
|
|
|
|
existing = query_one(conn, "SELECT id FROM collections WHERE name = ?", [name])
|
|
if existing:
|
|
conn.execute(
|
|
"""
|
|
UPDATE collections
|
|
SET display_name = ?, description = ?, icon = ?, maintainer = ?,
|
|
tools = ?, pinned = ?, tags = ?, updated_at = ?
|
|
WHERE id = ?
|
|
""",
|
|
[display_name, description, icon, maintainer, tools_json,
|
|
pinned_json, tags_json, datetime.utcnow().isoformat(), existing["id"]],
|
|
)
|
|
else:
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO collections (name, display_name, description, icon, maintainer, tools, pinned, tags)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
[name, display_name, description, icon, maintainer, tools_json, pinned_json, tags_json],
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
|
|
def sync_from_repo() -> Tuple[bool, str]:
|
|
repo_dir = get_repo_dir()
|
|
clone_or_update_repo(repo_dir)
|
|
|
|
tools_root = repo_dir / "tools"
|
|
if not tools_root.exists():
|
|
return False, "missing_tools_dir"
|
|
|
|
conn = connect_db()
|
|
try:
|
|
conn.execute("BEGIN")
|
|
for config_path in tools_root.glob("*/*/config.yaml"):
|
|
owner = config_path.parent.parent.name
|
|
name = config_path.parent.name
|
|
try:
|
|
config_text = config_path.read_text(encoding="utf-8")
|
|
data = yaml.safe_load(config_text) or {}
|
|
readme_path = config_path.parent / "README.md"
|
|
readme_text = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None
|
|
upsert_tool(conn, owner, name, data, config_text, readme_text)
|
|
except Exception:
|
|
continue
|
|
sync_collections(conn, repo_dir)
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
sync_categories(repo_dir)
|
|
return True, "ok"
|
|
|
|
|
|
def record_webhook_delivery(conn, delivery_id: str, event_type: str) -> None:
|
|
conn.execute(
|
|
"INSERT INTO webhook_log (delivery_id, event_type, processed_at) VALUES (?, ?, ?)",
|
|
[delivery_id, event_type, datetime.utcnow().isoformat()],
|
|
)
|
|
|
|
|
|
def is_delivery_processed(conn, delivery_id: str) -> bool:
|
|
row = query_one(conn, "SELECT 1 FROM webhook_log WHERE delivery_id = ?", [delivery_id])
|
|
return bool(row)
|
|
|
|
|
|
def acquire_lock(lock_path: Path, timeout: int) -> bool:
|
|
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
start = time.time()
|
|
while time.time() - start < timeout:
|
|
try:
|
|
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
os.close(fd)
|
|
return True
|
|
except FileExistsError:
|
|
time.sleep(0.1)
|
|
return False
|
|
|
|
|
|
def release_lock(lock_path: Path) -> None:
|
|
if lock_path.exists():
|
|
lock_path.unlink()
|
|
|
|
|
|
def process_webhook(body: bytes, headers: Dict[str, str], secret: str, timeout: int = 300) -> Tuple[int, Dict[str, Any]]:
|
|
delivery_id = headers.get("X-Gitea-Delivery")
|
|
signature = headers.get("X-Gitea-Signature")
|
|
event_type = headers.get("X-Gitea-Event", "unknown")
|
|
|
|
if not delivery_id:
|
|
return 400, {"error": {"code": "VALIDATION_ERROR", "message": "Missing X-Gitea-Delivery"}}
|
|
|
|
if not verify_hmac(body, signature, secret):
|
|
return 401, {"error": {"code": "UNAUTHORIZED", "message": "Invalid webhook signature"}}
|
|
|
|
conn = connect_db()
|
|
try:
|
|
if is_delivery_processed(conn, delivery_id):
|
|
return 200, {"data": {"status": "already_processed"}}
|
|
|
|
lock_path = Path.home() / ".cmdforge" / "registry" / "locks" / "webhook.lock"
|
|
if not acquire_lock(lock_path, timeout):
|
|
return 200, {"data": {"status": "skipped", "reason": "sync_in_progress"}}
|
|
|
|
try:
|
|
if is_delivery_processed(conn, delivery_id):
|
|
return 200, {"data": {"status": "already_processed"}}
|
|
ok, reason = sync_from_repo()
|
|
if ok:
|
|
record_webhook_delivery(conn, delivery_id, event_type)
|
|
conn.commit()
|
|
return 200, {"data": {"status": "processed"}}
|
|
return 500, {"error": {"code": "SERVER_ERROR", "message": f"Sync failed: {reason}"}}
|
|
finally:
|
|
release_lock(lock_path)
|
|
finally:
|
|
conn.close()
|