CmdForge/scripts/scrutiny.py

508 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Tool vetting/scrutiny module for CmdForge.
Performs AI-powered analysis of tools to assess quality and safety:
- Honesty: Does the tool do what it claims?
- Transparency: Is the prompt clear and understandable?
- Scope: Is the tool appropriately scoped?
- Efficiency: Is the prompt well-structured?
- Safety: Are there any concerning patterns?
Usage:
# Vet a single tool
python scripts/scrutiny.py path/to/tool/config.yaml
# Vet all tools in directory
python scripts/scrutiny.py --all ~/.cmdforge/
# Output as JSON
python scripts/scrutiny.py --json path/to/tool/config.yaml
# Use specific provider for analysis
python scripts/scrutiny.py --provider claude path/to/tool/config.yaml
"""
import argparse
import json
import sys
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional
import yaml
class VetResult(Enum):
"""Vetting decision."""
APPROVE = "approve" # Auto-approve - meets all criteria
REVIEW = "review" # Needs human review - some concerns
REJECT = "reject" # Auto-reject - fails criteria
ERROR = "error" # Could not vet
@dataclass
class VetScore:
"""Individual score for a vetting criterion."""
criterion: str
score: float # 0.0 to 1.0
max_score: float = 1.0
notes: str = ""
concerns: list[str] = field(default_factory=list)
@dataclass
class VetReport:
"""Complete vetting report for a tool."""
tool_name: str
tool_path: str
result: VetResult
overall_score: float # 0.0 to 1.0
scores: list[VetScore] = field(default_factory=list)
suggestions: list[str] = field(default_factory=list)
error: Optional[str] = None
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
d = asdict(self)
d['result'] = self.result.value
return d
# Thresholds for auto-approve/reject
APPROVE_THRESHOLD = 0.8 # Score >= 0.8 -> auto-approve
REJECT_THRESHOLD = 0.3 # Score < 0.3 -> auto-reject
def load_tool_config(path: Path) -> Optional[dict]:
"""Load tool configuration from YAML file."""
if path.is_dir():
config_file = path / "config.yaml"
else:
config_file = path
if not config_file.exists():
return None
with open(config_file) as f:
return yaml.safe_load(f)
def vet_honesty(config: dict) -> VetScore:
"""Check if tool description matches what it actually does."""
score = VetScore(criterion="honesty", score=0.0, notes="")
concerns = []
name = config.get("name", "")
description = config.get("description", "")
steps = config.get("steps", [])
# Check that description exists
if not description:
concerns.append("Missing description")
score.score = 0.3
else:
score.score = 0.6
# Check that steps exist
if not steps:
concerns.append("No execution steps defined")
score.score = min(score.score, 0.2)
else:
# Check if description keywords appear in prompts
desc_words = set(description.lower().split())
prompt_text = ""
for step in steps:
if step.get("type") == "prompt":
prompt_text += step.get("prompt", "").lower() + " "
# Simple keyword overlap check
prompt_words = set(prompt_text.split())
overlap = desc_words & prompt_words
meaningful_overlap = overlap - {"the", "a", "an", "and", "or", "is", "to", "for", "of", "in"}
if len(meaningful_overlap) >= 2:
score.score = min(1.0, score.score + 0.3)
score.notes = f"Description matches prompt content ({len(meaningful_overlap)} keywords)"
else:
concerns.append("Description may not match actual behavior")
score.concerns = concerns
return score
def vet_transparency(config: dict) -> VetScore:
"""Check if the tool's behavior is clear and understandable."""
score = VetScore(criterion="transparency", score=0.0, notes="")
concerns = []
steps = config.get("steps", [])
if not steps:
concerns.append("No steps to analyze")
score.concerns = concerns
return score
# Analyze each step
total_prompt_length = 0
has_clear_instructions = False
for step in steps:
if step.get("type") == "prompt":
prompt = step.get("prompt", "")
total_prompt_length += len(prompt)
# Check for clear instruction patterns
instruction_patterns = [
"you are", "your task", "please", "analyze", "extract",
"summarize", "create", "write", "explain", "review"
]
prompt_lower = prompt.lower()
if any(p in prompt_lower for p in instruction_patterns):
has_clear_instructions = True
# Score based on findings
if has_clear_instructions:
score.score += 0.5
score.notes = "Contains clear instructions"
if total_prompt_length > 50:
score.score += 0.3
score.notes += "; Substantial prompt content"
elif total_prompt_length > 0:
score.score += 0.1
concerns.append("Very short prompt - may lack clarity")
# Check for output variable naming
for step in steps:
output_var = step.get("output_var", "")
if output_var and output_var != "response":
score.score += 0.2
score.notes += "; Descriptive output variable"
break
score.score = min(1.0, score.score)
score.concerns = concerns
return score
def vet_scope(config: dict) -> VetScore:
"""Check if tool is appropriately scoped (not too broad/narrow)."""
score = VetScore(criterion="scope", score=0.0, notes="")
concerns = []
description = config.get("description", "")
steps = config.get("steps", [])
arguments = config.get("arguments", [])
# Start with base score
score.score = 0.5
# Single-step tools are well-scoped
if len(steps) == 1:
score.score += 0.2
score.notes = "Single-step tool - focused scope"
elif len(steps) <= 3:
score.score += 0.1
score.notes = "Multi-step tool with reasonable complexity"
else:
concerns.append(f"Complex tool with {len(steps)} steps - may be over-scoped")
score.score -= 0.1
# Check for overly generic descriptions
generic_terms = ["everything", "anything", "all", "any task", "general purpose"]
desc_lower = description.lower()
if any(term in desc_lower for term in generic_terms):
concerns.append("Description suggests overly broad scope")
score.score -= 0.2
# Arguments indicate configurable scope (good)
if arguments:
score.score += 0.1
score.notes += "; Configurable via arguments"
score.score = max(0.0, min(1.0, score.score))
score.concerns = concerns
return score
def vet_efficiency(config: dict) -> VetScore:
"""Check if prompt is well-structured and efficient."""
score = VetScore(criterion="efficiency", score=0.0, notes="")
concerns = []
steps = config.get("steps", [])
# Analyze prompts
for step in steps:
if step.get("type") == "prompt":
prompt = step.get("prompt", "")
# Check for excessive repetition
words = prompt.lower().split()
word_counts = {}
for word in words:
if len(word) > 4: # Only check meaningful words
word_counts[word] = word_counts.get(word, 0) + 1
max_repetition = max(word_counts.values()) if word_counts else 0
if max_repetition > 5:
concerns.append(f"Repetitive language detected ({max_repetition}x)")
score.score = max(0.0, score.score - 0.2)
# Check for structured output hints
structure_patterns = [
"markdown", "json", "format", "structure", "sections",
"bullet", "numbered", "list", "table"
]
if any(p in prompt.lower() for p in structure_patterns):
score.score += 0.3
score.notes = "Specifies output structure"
# Reasonable length (not too short, not excessive)
if 100 <= len(prompt) <= 5000:
score.score += 0.4
elif len(prompt) < 100:
concerns.append("Very short prompt - may lack guidance")
score.score += 0.2
else:
concerns.append("Very long prompt - may be inefficient")
score.score += 0.2
# Base score if steps exist
if steps:
score.score += 0.3
score.score = min(1.0, score.score)
score.concerns = concerns
return score
def vet_safety(config: dict) -> VetScore:
"""Check for concerning patterns in the tool."""
score = VetScore(criterion="safety", score=1.0, notes="No safety concerns")
concerns = []
steps = config.get("steps", [])
# Check for code steps
code_step_count = 0
for step in steps:
if step.get("type") == "code":
code_step_count += 1
code = step.get("code", "")
# Check for potentially dangerous patterns
dangerous_patterns = [
("subprocess", "Executes shell commands"),
("os.system", "Executes shell commands"),
("eval(", "Dynamic code execution"),
("exec(", "Dynamic code execution"),
("open(", "File operations"),
("requests.", "Network requests"),
("urllib", "Network requests"),
("shutil.rmtree", "Recursive deletion"),
]
for pattern, concern in dangerous_patterns:
if pattern in code:
concerns.append(f"Code contains {concern.lower()}")
score.score -= 0.15
if code_step_count > 0:
score.notes = f"Contains {code_step_count} code step(s)"
if not concerns:
score.notes += " - no dangerous patterns detected"
score.score = max(0.0, score.score)
score.concerns = concerns
return score
def vet_tool(config: dict, tool_path: str) -> VetReport:
"""Perform complete vetting of a tool."""
name = config.get("name", "unknown")
# Run all checks
scores = [
vet_honesty(config),
vet_transparency(config),
vet_scope(config),
vet_efficiency(config),
vet_safety(config),
]
# Calculate overall score (weighted average)
weights = {
"honesty": 0.25,
"transparency": 0.20,
"scope": 0.15,
"efficiency": 0.15,
"safety": 0.25,
}
total_weight = sum(weights.values())
weighted_sum = sum(s.score * weights.get(s.criterion, 0.1) for s in scores)
overall_score = weighted_sum / total_weight
# Determine result
if overall_score >= APPROVE_THRESHOLD:
result = VetResult.APPROVE
elif overall_score < REJECT_THRESHOLD:
result = VetResult.REJECT
else:
result = VetResult.REVIEW
# Collect all concerns for suggestions
suggestions = []
for s in scores:
for concern in s.concerns:
suggestions.append(f"[{s.criterion}] {concern}")
return VetReport(
tool_name=name,
tool_path=tool_path,
result=result,
overall_score=overall_score,
scores=scores,
suggestions=suggestions,
)
def vet_directory(directory: Path, provider: Optional[str] = None) -> list[VetReport]:
"""Vet all tools in a directory."""
reports = []
for entry in directory.iterdir():
config_file = None
if entry.is_dir():
config_file = entry / "config.yaml"
elif entry.suffix in [".yaml", ".yml"]:
config_file = entry
if config_file and config_file.exists():
config = load_tool_config(config_file)
if config:
report = vet_tool(config, str(entry))
reports.append(report)
return reports
def print_report(report: VetReport, verbose: bool = False):
"""Print a vetting report to console."""
# Result emoji
result_emoji = {
VetResult.APPROVE: "",
VetResult.REVIEW: "⚠️",
VetResult.REJECT: "",
VetResult.ERROR: "💥",
}
emoji = result_emoji.get(report.result, "")
print(f"\n{emoji} {report.tool_name}: {report.result.value.upper()} (score: {report.overall_score:.2f})")
if verbose or report.result != VetResult.APPROVE:
print(f" Path: {report.tool_path}")
# Print individual scores
for score in report.scores:
bar = "" * int(score.score * 10) + "" * (10 - int(score.score * 10))
print(f" {score.criterion:12} [{bar}] {score.score:.2f}")
if score.concerns:
for concern in score.concerns:
print(f"{concern}")
# Print suggestions
if report.suggestions and verbose:
print(" Suggestions:")
for suggestion in report.suggestions:
print(f"{suggestion}")
def main():
parser = argparse.ArgumentParser(
description="Vet CmdForge tools for quality and safety",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument(
"path",
type=Path,
nargs="?",
help="Tool config file or directory to vet"
)
parser.add_argument(
"--all",
action="store_true",
help="Vet all tools in directory"
)
parser.add_argument(
"--json",
action="store_true",
help="Output as JSON"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Show detailed output"
)
parser.add_argument(
"--provider",
default=None,
help="AI provider for enhanced analysis (future feature)"
)
args = parser.parse_args()
if not args.path:
parser.error("Please specify a path to vet")
# Collect reports
reports = []
if args.all or args.path.is_dir():
reports = vet_directory(args.path, args.provider)
else:
config = load_tool_config(args.path)
if not config:
print(f"Error: Could not load tool config from {args.path}", file=sys.stderr)
return 1
report = vet_tool(config, str(args.path))
reports.append(report)
if not reports:
print("No tools found to vet", file=sys.stderr)
return 1
# Output
if args.json:
output = [r.to_dict() for r in reports]
print(json.dumps(output, indent=2))
else:
# Summary
approved = sum(1 for r in reports if r.result == VetResult.APPROVE)
review = sum(1 for r in reports if r.result == VetResult.REVIEW)
rejected = sum(1 for r in reports if r.result == VetResult.REJECT)
print(f"Vetting {len(reports)} tool(s)...")
for report in reports:
print_report(report, args.verbose)
print(f"\n{'' * 40}")
print(f"Summary: {approved} approved, {review} need review, {rejected} rejected")
# Return code based on results
if rejected > 0:
return 2
elif review > 0:
return 1
return 0
if __name__ == "__main__":
sys.exit(main())