From 9be0ab4551f0c6187a713fd20ef3de6f18128879 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 26 Jan 2026 23:10:27 -0400 Subject: [PATCH] Fix hash verification for imported tools - Add config_hash computation to sync.py when importing tools from repo - Add migrate_hashes.py script to recompute all hashes in the database The hash mismatch bug was caused by: 1. Tools imported via sync.py never had config_hash computed 2. The exclude_fields list changed over time, causing old hashes to mismatch Co-Authored-By: Claude Opus 4.5 --- src/cmdforge/registry/migrate_hashes.py | 102 ++++++++++++++++++++++++ src/cmdforge/registry/sync.py | 14 +++- 2 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 src/cmdforge/registry/migrate_hashes.py diff --git a/src/cmdforge/registry/migrate_hashes.py b/src/cmdforge/registry/migrate_hashes.py new file mode 100644 index 0000000..284e69b --- /dev/null +++ b/src/cmdforge/registry/migrate_hashes.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""One-time migration to recompute all config_hash values. + +Run this on the server after deploying the fix to sync.py: + python -m cmdforge.registry.migrate_hashes + +This will: +1. Load all tools from the database +2. Recompute config_hash using the current (correct) algorithm +3. Update the database with the new hashes +""" + +import sys +from pathlib import Path + +# Ensure the package is importable +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) + +from cmdforge.registry.db import connect_db, query_all +from cmdforge.hash_utils import compute_yaml_hash + + +def migrate_hashes(dry_run: bool = False) -> None: + """Recompute and update all config_hash values in the database.""" + conn = connect_db() + + try: + # Get all tools + rows = query_all(conn, "SELECT id, owner, name, version, config_yaml, config_hash FROM tools") + + print(f"Found {len(rows)} tools to process") + + updated = 0 + unchanged = 0 + errors = 0 + + for row in rows: + tool_id = row["id"] + owner = row["owner"] + name = row["name"] + version = row["version"] + config_yaml = row["config_yaml"] + old_hash = row["config_hash"] or "" + + if not config_yaml: + print(f" SKIP {owner}/{name}@{version}: no config_yaml") + errors += 1 + continue + + try: + new_hash = compute_yaml_hash(config_yaml) + except Exception as e: + print(f" ERROR {owner}/{name}@{version}: {e}") + errors += 1 + continue + + if new_hash == old_hash: + unchanged += 1 + continue + + if dry_run: + print(f" WOULD UPDATE {owner}/{name}@{version}") + print(f" old: {old_hash[:30]}..." if old_hash else " old: (none)") + print(f" new: {new_hash[:30]}...") + updated += 1 + else: + conn.execute( + "UPDATE tools SET config_hash = ? WHERE id = ?", + [new_hash, tool_id] + ) + updated += 1 + if updated % 100 == 0: + print(f" Updated {updated} tools...") + + if not dry_run: + conn.commit() + print(f"\nCommitted changes to database") + + print(f"\nSummary:") + print(f" Updated: {updated}") + print(f" Unchanged: {unchanged}") + print(f" Errors: {errors}") + print(f" Total: {len(rows)}") + + finally: + conn.close() + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Migrate config_hash values") + parser.add_argument("--dry-run", action="store_true", help="Show what would be updated without making changes") + args = parser.parse_args() + + if args.dry_run: + print("DRY RUN - no changes will be made\n") + + migrate_hashes(dry_run=args.dry_run) + + +if __name__ == "__main__": + main() diff --git a/src/cmdforge/registry/sync.py b/src/cmdforge/registry/sync.py index 8f739c2..8ac14c5 100644 --- a/src/cmdforge/registry/sync.py +++ b/src/cmdforge/registry/sync.py @@ -16,6 +16,7 @@ from typing import Any, Dict, Iterable, Tuple import yaml from .db import connect_db, query_one +from ..hash_utils import compute_yaml_hash def get_repo_dir() -> Path: @@ -107,6 +108,9 @@ def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: downloads = registry_meta.get("downloads") published_at = registry_meta.get("published_at") + # Compute content hash for integrity verification + config_hash = compute_yaml_hash(config_text) + existing = query_one( conn, "SELECT id FROM tools WHERE owner = ? AND name = ? AND version = ?", @@ -119,7 +123,8 @@ def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: UPDATE tools SET description = ?, category = ?, tags = ?, config_yaml = ?, readme = ?, deprecated = ?, deprecated_message = ?, replacement = ?, - downloads = COALESCE(?, downloads), published_at = COALESCE(?, published_at) + downloads = COALESCE(?, downloads), published_at = COALESCE(?, published_at), + config_hash = ? WHERE id = ? """, [ @@ -133,6 +138,7 @@ def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: replacement, downloads, published_at, + config_hash, existing["id"], ], ) @@ -141,8 +147,9 @@ def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: """ INSERT INTO tools ( owner, name, version, description, category, tags, config_yaml, readme, - publisher_id, deprecated, deprecated_message, replacement, downloads, published_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + publisher_id, deprecated, deprecated_message, replacement, downloads, published_at, + config_hash + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, [ owner, @@ -159,6 +166,7 @@ def upsert_tool(conn, owner: str, name: str, data: Dict[str, Any], config_text: replacement, downloads or 0, published_at, + config_hash, ], )