From 8471480aa7b61f88e3b6b18e8525ab0fd66e7eb7 Mon Sep 17 00:00:00 2001
From: rob <robdickson444@hotmail.com>
Date: Fri, 16 Jan 2026 16:03:44 -0400
Subject: [PATCH] Add AI-powered secondary review for scrutiny warnings

- Create scrutiny-ai-review tool that uses AI to analyze warnings
- Integrate AI review into publish flow (app.py)
- Integrate AI review into Fabric sync script
- If AI review returns APPROVE with >=80% confidence, auto-approve
- Display AI review results in admin pending tools modal
- Shows verdict (APPROVE/REJECT/NEEDS_HUMAN_REVIEW) with confidence
- Shows per-finding analysis (FALSE_POSITIVE/LEGITIMATE_CONCERN)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 scripts/fabric_sync.py                        | 74 ++++++++++++++++++-
 src/cmdforge/registry/app.py                  | 73 ++++++++++++++++++
 src/cmdforge/web/templates/admin/pending.html | 55 ++++++++++++++
 3 files changed, 201 insertions(+), 1 deletion(-)

diff --git a/scripts/fabric_sync.py b/scripts/fabric_sync.py
index 769a7f3..6b1e32e 100755
--- a/scripts/fabric_sync.py
+++ b/scripts/fabric_sync.py
@@ -58,6 +58,62 @@ DEFAULT_STATE_FILE = DEFAULT_SYNC_DIR / "sync_state.json"
 DEFAULT_PROVIDER = "opencode-pickle"
 
 
+def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str):
+    """Run AI-powered secondary review of scrutiny warnings.
+
+    Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
+
+    Args:
+        scrutiny_report: The scrutiny report with findings
+        config: The tool configuration dict
+        tool_name: Name of the tool being reviewed
+        description: Tool description
+
+    Returns:
+        AI review result dict, or None if review fails
+    """
+    # Check if the AI review tool exists
+    tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
+    if not tool_path.exists():
+        return None
+
+    # Extract warnings from scrutiny report
+    warnings = [
+        f for f in scrutiny_report.get("findings", [])
+        if f.get("result") == "warning"
+    ]
+
+    if not warnings:
+        return None
+
+    # Prepare tool config for review
+    tool_config = {
+        "name": tool_name,
+        "description": description or "",
+        "steps": config.get("steps", []),
+        "arguments": config.get("arguments", []),
+    }
+
+    # Run the tool
+    try:
+        result = subprocess.run(
+            [
+                "cmdforge", "run", "scrutiny-ai-review",
+                "--warnings", json.dumps(warnings),
+                "--tool-config", json.dumps(tool_config),
+            ],
+            capture_output=True,
+            text=True,
+            timeout=60,  # 60 second timeout
+        )
+
+        if result.returncode == 0 and result.stdout.strip():
+            return json.loads(result.stdout.strip())
+        return None
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
+        return None
+
+
 @dataclass
 class PatternState:
     """State of a single pattern."""
@@ -292,8 +348,24 @@ def publish_to_registry(
         except Exception as e:
             logger.warning(f"Scrutiny failed for {name}: {e}")
 
-        # Check scrutiny decision
+        # Run AI secondary review if there are warnings
         scrutiny_decision = scrutiny_report.get("decision", "review")
+        if scrutiny_decision == "review":
+            try:
+                ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
+                if ai_review:
+                    scrutiny_report["ai_review"] = ai_review
+                    # Update decision based on AI review
+                    if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
+                        scrutiny_report["decision"] = "approve"
+                        scrutiny_report["ai_approved"] = True
+                        scrutiny_decision = "approve"
+                        logger.info(f"  AI review approved {name} (confidence: {ai_review.get('confidence', 0):.2f})")
+            except Exception as e:
+                scrutiny_report["ai_review_error"] = str(e)
+                logger.warning(f"AI review failed for {name}: {e}")
+
+        # Check scrutiny decision
         if scrutiny_decision == "reject":
             fail_findings = [f for f in scrutiny_report.get("findings", []) if f.get("result") == "fail"]
             fail_msg = fail_findings[0]["message"] if fail_findings else "quality too low"
diff --git a/src/cmdforge/registry/app.py b/src/cmdforge/registry/app.py
index 28bfca5..feaa69a 100644
--- a/src/cmdforge/registry/app.py
+++ b/src/cmdforge/registry/app.py
@@ -194,6 +194,65 @@ def select_version(versions: List[str], constraint_raw: Optional[str]) -> Option
     return max(filtered, key=lambda item: item[0])[1]
 
 
+def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str) -> Optional[dict]:
+    """Run AI-powered secondary review of scrutiny warnings.
+
+    Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
+
+    Args:
+        scrutiny_report: The scrutiny report with findings
+        config: The tool configuration dict
+        tool_name: Name of the tool being reviewed
+        description: Tool description
+
+    Returns:
+        AI review result dict, or None if review fails
+    """
+    import subprocess
+    from pathlib import Path
+
+    # Check if the AI review tool exists
+    tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
+    if not tool_path.exists():
+        return None
+
+    # Extract warnings from scrutiny report
+    warnings = [
+        f for f in scrutiny_report.get("findings", [])
+        if f.get("result") == "warning"
+    ]
+
+    if not warnings:
+        return None
+
+    # Prepare tool config for review
+    tool_config = {
+        "name": tool_name,
+        "description": description or "",
+        "steps": config.get("steps", []),
+        "arguments": config.get("arguments", []),
+    }
+
+    # Run the tool
+    try:
+        result = subprocess.run(
+            [
+                "cmdforge", "run", "scrutiny-ai-review",
+                "--warnings", json.dumps(warnings),
+                "--tool-config", json.dumps(tool_config),
+            ],
+            capture_output=True,
+            text=True,
+            timeout=60,  # 60 second timeout
+        )
+
+        if result.returncode == 0 and result.stdout.strip():
+            return json.loads(result.stdout.strip())
+        return None
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
+        return None
+
+
 def create_app() -> Flask:
     app = Flask(__name__)
     app.config["MAX_CONTENT_LENGTH"] = MAX_BODY_BYTES
@@ -1787,6 +1846,20 @@ def create_app() -> Flask:
                     details={"scrutiny": scrutiny_report},
                 )
 
+            # Run AI secondary review if there are warnings
+            if scrutiny_report.get("decision") == "review":
+                try:
+                    ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
+                    if ai_review:
+                        scrutiny_report["ai_review"] = ai_review
+                        # Update decision based on AI review
+                        if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
+                            scrutiny_report["decision"] = "approve"
+                            scrutiny_report["ai_approved"] = True
+                except Exception as e:
+                    # Don't fail publish if AI review fails
+                    scrutiny_report["ai_review_error"] = str(e)
+
         if dry_run:
             return jsonify({
                 "data": {
diff --git a/src/cmdforge/web/templates/admin/pending.html b/src/cmdforge/web/templates/admin/pending.html
index 72ccd68..e65c7dd 100644
--- a/src/cmdforge/web/templates/admin/pending.html
+++ b/src/cmdforge/web/templates/admin/pending.html
@@ -153,6 +153,12 @@
         <div class="p-6 overflow-y-auto flex-grow">
             <div id="detail-loading" class="py-8 text-center text-gray-500">Loading...</div>
             <div id="detail-content" class="hidden">
+                <!-- AI Review (if available) -->
+                <div id="detail-ai-review-section" class="mb-6 hidden">
+                    <h4 class="text-sm font-medium text-gray-700 mb-2">AI Review</h4>
+                    <div id="detail-ai-review" class="rounded-md p-4"></div>
+                </div>
+
                 <!-- Scrutiny Warnings -->
                 <div id="detail-warnings-section" class="mb-6 hidden">
                     <h4 class="text-sm font-medium text-gray-700 mb-2">Scrutiny Warnings</h4>
@@ -239,6 +245,55 @@ async function viewTool(toolId) {
         // Update title
         document.getElementById('detail-title').textContent = `${tool.owner}/${tool.name} v${tool.version}`;
 
+        // AI Review (if available)
+        const aiReviewSection = document.getElementById('detail-ai-review-section');
+        const aiReviewDiv = document.getElementById('detail-ai-review');
+        const aiReview = tool.scrutiny_report?.ai_review;
+        if (aiReview) {
+            aiReviewSection.classList.remove('hidden');
+            const verdict = aiReview.overall_verdict || 'UNKNOWN';
+            const confidence = (aiReview.confidence * 100).toFixed(0) || '?';
+            let verdictColor, verdictBg;
+            if (verdict === 'APPROVE') {
+                verdictColor = 'text-green-800';
+                verdictBg = 'bg-green-50 border-green-200';
+            } else if (verdict === 'REJECT') {
+                verdictColor = 'text-red-800';
+                verdictBg = 'bg-red-50 border-red-200';
+            } else {
+                verdictColor = 'text-yellow-800';
+                verdictBg = 'bg-yellow-50 border-yellow-200';
+            }
+
+            let findingsHtml = '';
+            if (aiReview.findings && aiReview.findings.length > 0) {
+                findingsHtml = '<div class="mt-3 space-y-2">' + aiReview.findings.map(f => {
+                    let fColor = f.verdict === 'FALSE_POSITIVE' ? 'text-green-700' :
+                                 f.verdict === 'LEGITIMATE_CONCERN' ? 'text-red-700' : 'text-yellow-700';
+                    return `<div class="text-sm border-l-2 pl-3 ${f.verdict === 'FALSE_POSITIVE' ? 'border-green-400' : f.verdict === 'LEGITIMATE_CONCERN' ? 'border-red-400' : 'border-yellow-400'}">
+                        <div class="font-medium ${fColor}">${escapeHtml(f.verdict || 'UNKNOWN')}</div>
+                        <div class="text-gray-600">${escapeHtml(f.explanation || '')}</div>
+                    </div>`;
+                }).join('') + '</div>';
+            }
+
+            aiReviewDiv.className = `rounded-md p-4 border ${verdictBg}`;
+            aiReviewDiv.innerHTML = `
+                <div class="flex items-center justify-between mb-2">
+                    <span class="font-medium ${verdictColor}">${verdict}</span>
+                    <span class="text-sm text-gray-500">${confidence}% confidence</span>
+                </div>
+                <div class="text-sm text-gray-700">${escapeHtml(aiReview.summary || '')}</div>
+                ${findingsHtml}
+            `;
+        } else if (tool.scrutiny_report?.ai_review_error) {
+            aiReviewSection.classList.remove('hidden');
+            aiReviewDiv.className = 'rounded-md p-4 border bg-gray-50 border-gray-200';
+            aiReviewDiv.innerHTML = `<div class="text-sm text-gray-500">AI review failed: ${escapeHtml(tool.scrutiny_report.ai_review_error)}</div>`;
+        } else {
+            aiReviewSection.classList.add('hidden');
+        }
+
         // Scrutiny Warnings
         const warningsSection = document.getElementById('detail-warnings-section');
         const warningsDiv = document.getElementById('detail-warnings');