CmdForge/src/cmdforge/hash_utils.py

128 lines
3.8 KiB
Python

"""Content hash utilities for tool integrity verification.
This module provides consistent SHA256 hashing for tool content,
used for:
- Publish state tracking (detect local modifications)
- Download integrity verification
- Registry content verification
"""
import hashlib
from typing import Optional, Dict, Any
import yaml
def compute_config_hash(config: Dict[str, Any], exclude_fields: Optional[list] = None) -> str:
"""Compute SHA256 hash of tool configuration.
The hash is computed from a normalized YAML representation to ensure
consistent hashing regardless of field order or formatting.
Args:
config: Tool configuration dictionary
exclude_fields: Fields to exclude from hashing (e.g., 'published_hash', 'registry_hash')
Returns:
Hash string in format "sha256:<64-char-hex>"
"""
if exclude_fields is None:
# Exclude registry metadata and publication metadata from hash
# These fields are added during publish but aren't part of tool content
exclude_fields = [
'published_hash', 'registry_hash', 'registry_status', 'registry_feedback', # registry metadata
'version', 'tags', # publication metadata (added by publish dialog)
]
# Create a copy without excluded fields
config_copy = {k: v for k, v in config.items() if k not in exclude_fields}
# Normalize to YAML with sorted keys for consistent ordering
normalized = yaml.dump(config_copy, sort_keys=True, default_flow_style=False)
# Compute SHA256
hash_bytes = hashlib.sha256(normalized.encode('utf-8')).hexdigest()
return f"sha256:{hash_bytes}"
def compute_yaml_hash(yaml_content: str, exclude_fields: Optional[list] = None) -> str:
"""Compute SHA256 hash of YAML content string.
Parses the YAML, normalizes it, and computes hash.
Useful for hashing raw config.yaml content.
Args:
yaml_content: Raw YAML string
exclude_fields: Fields to exclude from hashing
Returns:
Hash string in format "sha256:<64-char-hex>"
"""
config = yaml.safe_load(yaml_content)
if config is None:
config = {}
return compute_config_hash(config, exclude_fields)
def compute_file_hash(file_path: str) -> str:
"""Compute SHA256 hash of a file's contents.
Used for hashing pattern files (e.g., Fabric's system.md).
Args:
file_path: Path to file
Returns:
Hash string in format "sha256:<64-char-hex>"
"""
with open(file_path, 'rb') as f:
hash_bytes = hashlib.sha256(f.read()).hexdigest()
return f"sha256:{hash_bytes}"
def verify_hash(content: str, expected_hash: str) -> bool:
"""Verify content matches expected hash.
Args:
content: Content to verify (YAML string)
expected_hash: Expected hash in format "sha256:<hex>"
Returns:
True if hash matches, False otherwise
"""
if not expected_hash or not expected_hash.startswith("sha256:"):
return False
computed = compute_yaml_hash(content)
return computed == expected_hash
def extract_hash_value(hash_string: str) -> Optional[str]:
"""Extract the hex value from a hash string.
Args:
hash_string: Hash in format "sha256:<hex>"
Returns:
The hex value without prefix, or None if invalid
"""
if not hash_string or not hash_string.startswith("sha256:"):
return None
return hash_string[7:] # Remove "sha256:" prefix
def short_hash(hash_string: str, length: int = 8) -> str:
"""Get a shortened version of a hash for display.
Args:
hash_string: Full hash string
length: Number of hex chars to include
Returns:
Shortened hash (e.g., "sha256:abc123...")
"""
hex_value = extract_hash_value(hash_string)
if hex_value:
return f"sha256:{hex_value[:length]}..."
return hash_string[:length + 10] + "..."