CmdForge/scripts/scrutiny.py

#!/usr/bin/env python3
"""
Tool vetting/scrutiny module for CmdForge.

Performs AI-powered analysis of tools to assess quality and safety:
- Honesty: Does the tool do what it claims?
- Transparency: Is the prompt clear and understandable?
- Scope: Is the tool appropriately scoped?
- Efficiency: Is the prompt well-structured?
- Safety: Are there any concerning patterns?

Usage:
    # Vet a single tool
    python scripts/scrutiny.py path/to/tool/config.yaml

    # Vet all tools in directory
    python scripts/scrutiny.py --all ~/.cmdforge/

    # Output as JSON
    python scripts/scrutiny.py --json path/to/tool/config.yaml

    # Use specific provider for analysis
    python scripts/scrutiny.py --provider claude path/to/tool/config.yaml
"""

import argparse
import json
import sys
from dataclasses import dataclass, field, asdict
from enum import Enum
from pathlib import Path
from typing import Optional

import yaml


class VetResult(Enum):
    """Vetting decision."""
    APPROVE = "approve"      # Auto-approve - meets all criteria
    REVIEW = "review"        # Needs human review - some concerns
    REJECT = "reject"        # Auto-reject - fails criteria
    ERROR = "error"          # Could not vet


@dataclass
class VetScore:
    """Individual score for a vetting criterion."""
    criterion: str
    score: float  # 0.0 to 1.0
    max_score: float = 1.0
    notes: str = ""
    concerns: list[str] = field(default_factory=list)


@dataclass
class VetReport:
    """Complete vetting report for a tool."""
    tool_name: str
    tool_path: str
    result: VetResult
    overall_score: float  # 0.0 to 1.0
    scores: list[VetScore] = field(default_factory=list)
    suggestions: list[str] = field(default_factory=list)
    error: Optional[str] = None

    def to_dict(self) -> dict:
        """Convert to dictionary for JSON serialization."""
        d = asdict(self)
        d['result'] = self.result.value
        return d


# Thresholds for auto-approve/reject
APPROVE_THRESHOLD = 0.8   # Score >= 0.8 -> auto-approve
REJECT_THRESHOLD = 0.3    # Score < 0.3 -> auto-reject


def load_tool_config(path: Path) -> Optional[dict]:
    """Load tool configuration from YAML file."""
    if path.is_dir():
        config_file = path / "config.yaml"
    else:
        config_file = path

    if not config_file.exists():
        return None

    with open(config_file) as f:
        return yaml.safe_load(f)


def vet_honesty(config: dict) -> VetScore:
    """Check if tool description matches what it actually does."""
    score = VetScore(criterion="honesty", score=0.0, notes="")
    concerns = []

    name = config.get("name", "")
    description = config.get("description", "")
    steps = config.get("steps", [])

    # Check that description exists
    if not description:
        concerns.append("Missing description")
        score.score = 0.3
    else:
        score.score = 0.6

    # Check that steps exist
    if not steps:
        concerns.append("No execution steps defined")
        score.score = min(score.score, 0.2)
    else:
        # Check if description keywords appear in prompts
        desc_words = set(description.lower().split())
        prompt_text = ""
        for step in steps:
            if step.get("type") == "prompt":
                prompt_text += step.get("prompt", "").lower() + " "

        # Simple keyword overlap check
        prompt_words = set(prompt_text.split())
        overlap = desc_words & prompt_words
        meaningful_overlap = overlap - {"the", "a", "an", "and", "or", "is", "to", "for", "of", "in"}

        if len(meaningful_overlap) >= 2:
            score.score = min(1.0, score.score + 0.3)
            score.notes = f"Description matches prompt content ({len(meaningful_overlap)} keywords)"
        else:
            concerns.append("Description may not match actual behavior")

    score.concerns = concerns
    return score


def vet_transparency(config: dict) -> VetScore:
    """Check if the tool's behavior is clear and understandable."""
    score = VetScore(criterion="transparency", score=0.0, notes="")
    concerns = []

    steps = config.get("steps", [])

    if not steps:
        concerns.append("No steps to analyze")
        score.concerns = concerns
        return score

    # Analyze each step
    total_prompt_length = 0
    has_clear_instructions = False

    for step in steps:
        if step.get("type") == "prompt":
            prompt = step.get("prompt", "")
            total_prompt_length += len(prompt)

            # Check for clear instruction patterns
            instruction_patterns = [
                "you are", "your task", "please", "analyze", "extract",
                "summarize", "create", "write", "explain", "review"
            ]
            prompt_lower = prompt.lower()
            if any(p in prompt_lower for p in instruction_patterns):
                has_clear_instructions = True

    # Score based on findings
    if has_clear_instructions:
        score.score += 0.5
        score.notes = "Contains clear instructions"

    if total_prompt_length > 50:
        score.score += 0.3
        score.notes += "; Substantial prompt content"
    elif total_prompt_length > 0:
        score.score += 0.1
        concerns.append("Very short prompt - may lack clarity")

    # Check for output variable naming
    for step in steps:
        output_var = step.get("output_var", "")
        if output_var and output_var != "response":
            score.score += 0.2
            score.notes += "; Descriptive output variable"
            break

    score.score = min(1.0, score.score)
    score.concerns = concerns
    return score


def vet_scope(config: dict) -> VetScore:
    """Check if tool is appropriately scoped (not too broad/narrow)."""
    score = VetScore(criterion="scope", score=0.0, notes="")
    concerns = []

    description = config.get("description", "")
    steps = config.get("steps", [])
    arguments = config.get("arguments", [])

    # Start with base score
    score.score = 0.5

    # Single-step tools are well-scoped
    if len(steps) == 1:
        score.score += 0.2
        score.notes = "Single-step tool - focused scope"
    elif len(steps) <= 3:
        score.score += 0.1
        score.notes = "Multi-step tool with reasonable complexity"
    else:
        concerns.append(f"Complex tool with {len(steps)} steps - may be over-scoped")
        score.score -= 0.1

    # Check for overly generic descriptions
    generic_terms = ["everything", "anything", "all", "any task", "general purpose"]
    desc_lower = description.lower()
    if any(term in desc_lower for term in generic_terms):
        concerns.append("Description suggests overly broad scope")
        score.score -= 0.2

    # Arguments indicate configurable scope (good)
    if arguments:
        score.score += 0.1
        score.notes += "; Configurable via arguments"

    score.score = max(0.0, min(1.0, score.score))
    score.concerns = concerns
    return score


def vet_efficiency(config: dict) -> VetScore:
    """Check if prompt is well-structured and efficient."""
    score = VetScore(criterion="efficiency", score=0.0, notes="")
    concerns = []

    steps = config.get("steps", [])

    # Analyze prompts
    for step in steps:
        if step.get("type") == "prompt":
            prompt = step.get("prompt", "")

            # Check for excessive repetition
            words = prompt.lower().split()
            word_counts = {}
            for word in words:
                if len(word) > 4:  # Only check meaningful words
                    word_counts[word] = word_counts.get(word, 0) + 1

            max_repetition = max(word_counts.values()) if word_counts else 0
            if max_repetition > 5:
                concerns.append(f"Repetitive language detected ({max_repetition}x)")
                score.score = max(0.0, score.score - 0.2)

            # Check for structured output hints
            structure_patterns = [
                "markdown", "json", "format", "structure", "sections",
                "bullet", "numbered", "list", "table"
            ]
            if any(p in prompt.lower() for p in structure_patterns):
                score.score += 0.3
                score.notes = "Specifies output structure"

            # Reasonable length (not too short, not excessive)
            if 100 <= len(prompt) <= 5000:
                score.score += 0.4
            elif len(prompt) < 100:
                concerns.append("Very short prompt - may lack guidance")
                score.score += 0.2
            else:
                concerns.append("Very long prompt - may be inefficient")
                score.score += 0.2

    # Base score if steps exist
    if steps:
        score.score += 0.3

    score.score = min(1.0, score.score)
    score.concerns = concerns
    return score


def vet_safety(config: dict) -> VetScore:
    """Check for concerning patterns in the tool."""
    score = VetScore(criterion="safety", score=1.0, notes="No safety concerns")
    concerns = []

    steps = config.get("steps", [])

    # Check for code steps
    code_step_count = 0
    for step in steps:
        if step.get("type") == "code":
            code_step_count += 1
            code = step.get("code", "")

            # Check for potentially dangerous patterns
            dangerous_patterns = [
                ("subprocess", "Executes shell commands"),
                ("os.system", "Executes shell commands"),
                ("eval(", "Dynamic code execution"),
                ("exec(", "Dynamic code execution"),
                ("open(", "File operations"),
                ("requests.", "Network requests"),
                ("urllib", "Network requests"),
                ("shutil.rmtree", "Recursive deletion"),
            ]

            for pattern, concern in dangerous_patterns:
                if pattern in code:
                    concerns.append(f"Code contains {concern.lower()}")
                    score.score -= 0.15

    if code_step_count > 0:
        score.notes = f"Contains {code_step_count} code step(s)"
        if not concerns:
            score.notes += " - no dangerous patterns detected"

    score.score = max(0.0, score.score)
    score.concerns = concerns
    return score


def vet_tool(config: dict, tool_path: str) -> VetReport:
    """Perform complete vetting of a tool."""
    name = config.get("name", "unknown")

    # Run all checks
    scores = [
        vet_honesty(config),
        vet_transparency(config),
        vet_scope(config),
        vet_efficiency(config),
        vet_safety(config),
    ]

    # Calculate overall score (weighted average)
    weights = {
        "honesty": 0.25,
        "transparency": 0.20,
        "scope": 0.15,
        "efficiency": 0.15,
        "safety": 0.25,
    }

    total_weight = sum(weights.values())
    weighted_sum = sum(s.score * weights.get(s.criterion, 0.1) for s in scores)
    overall_score = weighted_sum / total_weight

    # Determine result
    if overall_score >= APPROVE_THRESHOLD:
        result = VetResult.APPROVE
    elif overall_score < REJECT_THRESHOLD:
        result = VetResult.REJECT
    else:
        result = VetResult.REVIEW

    # Collect all concerns for suggestions
    suggestions = []
    for s in scores:
        for concern in s.concerns:
            suggestions.append(f"[{s.criterion}] {concern}")

    return VetReport(
        tool_name=name,
        tool_path=tool_path,
        result=result,
        overall_score=overall_score,
        scores=scores,
        suggestions=suggestions,
    )


def vet_directory(directory: Path, provider: Optional[str] = None) -> list[VetReport]:
    """Vet all tools in a directory."""
    reports = []

    for entry in directory.iterdir():
        config_file = None
        if entry.is_dir():
            config_file = entry / "config.yaml"
        elif entry.suffix in [".yaml", ".yml"]:
            config_file = entry

        if config_file and config_file.exists():
            config = load_tool_config(config_file)
            if config:
                report = vet_tool(config, str(entry))
                reports.append(report)

    return reports


def print_report(report: VetReport, verbose: bool = False):
    """Print a vetting report to console."""
    # Result emoji
    result_emoji = {
        VetResult.APPROVE: "✅",
        VetResult.REVIEW: "⚠️",
        VetResult.REJECT: "❌",
        VetResult.ERROR: "💥",
    }

    emoji = result_emoji.get(report.result, "❓")
    print(f"\n{emoji} {report.tool_name}: {report.result.value.upper()} (score: {report.overall_score:.2f})")

    if verbose or report.result != VetResult.APPROVE:
        print(f"   Path: {report.tool_path}")

        # Print individual scores
        for score in report.scores:
            bar = "█" * int(score.score * 10) + "░" * (10 - int(score.score * 10))
            print(f"   {score.criterion:12} [{bar}] {score.score:.2f}")
            if score.concerns:
                for concern in score.concerns:
                    print(f"      ⚠ {concern}")

        # Print suggestions
        if report.suggestions and verbose:
            print("   Suggestions:")
            for suggestion in report.suggestions:
                print(f"      • {suggestion}")


def main():
    parser = argparse.ArgumentParser(
        description="Vet CmdForge tools for quality and safety",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__
    )

    parser.add_argument(
        "path",
        type=Path,
        nargs="?",
        help="Tool config file or directory to vet"
    )
    parser.add_argument(
        "--all",
        action="store_true",
        help="Vet all tools in directory"
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="Output as JSON"
    )
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Show detailed output"
    )
    parser.add_argument(
        "--provider",
        default=None,
        help="AI provider for enhanced analysis (future feature)"
    )

    args = parser.parse_args()

    if not args.path:
        parser.error("Please specify a path to vet")

    # Collect reports
    reports = []

    if args.all or args.path.is_dir():
        reports = vet_directory(args.path, args.provider)
    else:
        config = load_tool_config(args.path)
        if not config:
            print(f"Error: Could not load tool config from {args.path}", file=sys.stderr)
            return 1
        report = vet_tool(config, str(args.path))
        reports.append(report)

    if not reports:
        print("No tools found to vet", file=sys.stderr)
        return 1

    # Output
    if args.json:
        output = [r.to_dict() for r in reports]
        print(json.dumps(output, indent=2))
    else:
        # Summary
        approved = sum(1 for r in reports if r.result == VetResult.APPROVE)
        review = sum(1 for r in reports if r.result == VetResult.REVIEW)
        rejected = sum(1 for r in reports if r.result == VetResult.REJECT)

        print(f"Vetting {len(reports)} tool(s)...")

        for report in reports:
            print_report(report, args.verbose)

        print(f"\n{'─' * 40}")
        print(f"Summary: {approved} approved, {review} need review, {rejected} rejected")

        # Return code based on results
        if rejected > 0:
            return 2
        elif review > 0:
            return 1
        return 0


if __name__ == "__main__":
    sys.exit(main())