Add AI-powered secondary review for scrutiny warnings

- Create scrutiny-ai-review tool that uses AI to analyze warnings - Integrate AI review into publish flow (app.py) - Integrate AI review into Fabric sync script - If AI review returns APPROVE with >=80% confidence, auto-approve - Display AI review results in admin pending tools modal - Shows verdict (APPROVE/REJECT/NEEDS_HUMAN_REVIEW) with confidence - Shows per-finding analysis (FALSE_POSITIVE/LEGITIMATE_CONCERN) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 16:03:44 -04:00 · 2026-01-16 16:03:44 -04:00 · 8471480aa7
parent b0bd692be5
commit 8471480aa7
3 changed files with 201 additions and 1 deletions
--- a/scripts/fabric_sync.py
+++ b/scripts/fabric_sync.py
@ -58,6 +58,62 @@ DEFAULT_STATE_FILE = DEFAULT_SYNC_DIR / "sync_state.json"
 DEFAULT_PROVIDER = "opencode-pickle"


+def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str):
+    """Run AI-powered secondary review of scrutiny warnings.
+
+    Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
+
+    Args:
+        scrutiny_report: The scrutiny report with findings
+        config: The tool configuration dict
+        tool_name: Name of the tool being reviewed
+        description: Tool description
+
+    Returns:
+        AI review result dict, or None if review fails
+    """
+    # Check if the AI review tool exists
+    tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
+    if not tool_path.exists():
+        return None
+
+    # Extract warnings from scrutiny report
+    warnings = [
+        f for f in scrutiny_report.get("findings", [])
+        if f.get("result") == "warning"
+    ]
+
+    if not warnings:
+        return None
+
+    # Prepare tool config for review
+    tool_config = {
+        "name": tool_name,
+        "description": description or "",
+        "steps": config.get("steps", []),
+        "arguments": config.get("arguments", []),
+    }
+
+    # Run the tool
+    try:
+        result = subprocess.run(
+            [
+                "cmdforge", "run", "scrutiny-ai-review",
+                "--warnings", json.dumps(warnings),
+                "--tool-config", json.dumps(tool_config),
+            ],
+            capture_output=True,
+            text=True,
+            timeout=60,  # 60 second timeout
+        )
+
+        if result.returncode == 0 and result.stdout.strip():
+            return json.loads(result.stdout.strip())
+        return None
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
+        return None
+
+
@dataclass
 class PatternState:
    """State of a single pattern."""
@ -292,8 +348,24 @@ def publish_to_registry(
        except Exception as e:
            logger.warning(f"Scrutiny failed for {name}: {e}")

-        # Check scrutiny decision
+        # Run AI secondary review if there are warnings
        scrutiny_decision = scrutiny_report.get("decision", "review")
+        if scrutiny_decision == "review":
+            try:
+                ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
+                if ai_review:
+                    scrutiny_report["ai_review"] = ai_review
+                    # Update decision based on AI review
+                    if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
+                        scrutiny_report["decision"] = "approve"
+                        scrutiny_report["ai_approved"] = True
+                        scrutiny_decision = "approve"
+                        logger.info(f"  AI review approved {name} (confidence: {ai_review.get('confidence', 0):.2f})")
+            except Exception as e:
+                scrutiny_report["ai_review_error"] = str(e)
+                logger.warning(f"AI review failed for {name}: {e}")
+
+        # Check scrutiny decision
        if scrutiny_decision == "reject":
            fail_findings = [f for f in scrutiny_report.get("findings", []) if f.get("result") == "fail"]
            fail_msg = fail_findings[0]["message"] if fail_findings else "quality too low"
--- a/src/cmdforge/registry/app.py
+++ b/src/cmdforge/registry/app.py
@ -194,6 +194,65 @@ def select_version(versions: List[str], constraint_raw: Optional[str]) -> Option
    return max(filtered, key=lambda item: item[0])[1]


+def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str) -> Optional[dict]:
+    """Run AI-powered secondary review of scrutiny warnings.
+
+    Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
+
+    Args:
+        scrutiny_report: The scrutiny report with findings
+        config: The tool configuration dict
+        tool_name: Name of the tool being reviewed
+        description: Tool description
+
+    Returns:
+        AI review result dict, or None if review fails
+    """
+    import subprocess
+    from pathlib import Path
+
+    # Check if the AI review tool exists
+    tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
+    if not tool_path.exists():
+        return None
+
+    # Extract warnings from scrutiny report
+    warnings = [
+        f for f in scrutiny_report.get("findings", [])
+        if f.get("result") == "warning"
+    ]
+
+    if not warnings:
+        return None
+
+    # Prepare tool config for review
+    tool_config = {
+        "name": tool_name,
+        "description": description or "",
+        "steps": config.get("steps", []),
+        "arguments": config.get("arguments", []),
+    }
+
+    # Run the tool
+    try:
+        result = subprocess.run(
+            [
+                "cmdforge", "run", "scrutiny-ai-review",
+                "--warnings", json.dumps(warnings),
+                "--tool-config", json.dumps(tool_config),
+            ],
+            capture_output=True,
+            text=True,
+            timeout=60,  # 60 second timeout
+        )
+
+        if result.returncode == 0 and result.stdout.strip():
+            return json.loads(result.stdout.strip())
+        return None
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
+        return None
+
+
 def create_app() -> Flask:
    app = Flask(__name__)
    app.config["MAX_CONTENT_LENGTH"] = MAX_BODY_BYTES
@ -1787,6 +1846,20 @@ def create_app() -> Flask:
                    details={"scrutiny": scrutiny_report},
                )

+            # Run AI secondary review if there are warnings
+            if scrutiny_report.get("decision") == "review":
+                try:
+                    ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
+                    if ai_review:
+                        scrutiny_report["ai_review"] = ai_review
+                        # Update decision based on AI review
+                        if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
+                            scrutiny_report["decision"] = "approve"
+                            scrutiny_report["ai_approved"] = True
+                except Exception as e:
+                    # Don't fail publish if AI review fails
+                    scrutiny_report["ai_review_error"] = str(e)
+
        if dry_run:
            return jsonify({
                "data": {
--- a/src/cmdforge/web/templates/admin/pending.html
+++ b/src/cmdforge/web/templates/admin/pending.html
@ -153,6 +153,12 @@
        <div class="p-6 overflow-y-auto flex-grow">
            <div id="detail-loading" class="py-8 text-center text-gray-500">Loading...</div>
            <div id="detail-content" class="hidden">
+                <!-- AI Review (if available) -->
+                <div id="detail-ai-review-section" class="mb-6 hidden">
+                    <h4 class="text-sm font-medium text-gray-700 mb-2">AI Review</h4>
+                    <div id="detail-ai-review" class="rounded-md p-4"></div>
+                </div>
+
                <!-- Scrutiny Warnings -->
                <div id="detail-warnings-section" class="mb-6 hidden">
                    <h4 class="text-sm font-medium text-gray-700 mb-2">Scrutiny Warnings</h4>
@ -239,6 +245,55 @@ async function viewTool(toolId) {
        // Update title
        document.getElementById('detail-title').textContent = `${tool.owner}/${tool.name} v${tool.version}`;

+        // AI Review (if available)
+        const aiReviewSection = document.getElementById('detail-ai-review-section');
+        const aiReviewDiv = document.getElementById('detail-ai-review');
+        const aiReview = tool.scrutiny_report?.ai_review;
+        if (aiReview) {
+            aiReviewSection.classList.remove('hidden');
+            const verdict = aiReview.overall_verdict || 'UNKNOWN';
+            const confidence = (aiReview.confidence * 100).toFixed(0) || '?';
+            let verdictColor, verdictBg;
+            if (verdict === 'APPROVE') {
+                verdictColor = 'text-green-800';
+                verdictBg = 'bg-green-50 border-green-200';
+            } else if (verdict === 'REJECT') {
+                verdictColor = 'text-red-800';
+                verdictBg = 'bg-red-50 border-red-200';
+            } else {
+                verdictColor = 'text-yellow-800';
+                verdictBg = 'bg-yellow-50 border-yellow-200';
+            }
+
+            let findingsHtml = '';
+            if (aiReview.findings && aiReview.findings.length > 0) {
+                findingsHtml = '<div class="mt-3 space-y-2">' + aiReview.findings.map(f => {
+                    let fColor = f.verdict === 'FALSE_POSITIVE' ? 'text-green-700' :
+                                 f.verdict === 'LEGITIMATE_CONCERN' ? 'text-red-700' : 'text-yellow-700';
+                    return `<div class="text-sm border-l-2 pl-3 ${f.verdict === 'FALSE_POSITIVE' ? 'border-green-400' : f.verdict === 'LEGITIMATE_CONCERN' ? 'border-red-400' : 'border-yellow-400'}">
+                        <div class="font-medium ${fColor}">${escapeHtml(f.verdict || 'UNKNOWN')}</div>
+                        <div class="text-gray-600">${escapeHtml(f.explanation || '')}</div>
+                    </div>`;
+                }).join('') + '</div>';
+            }
+
+            aiReviewDiv.className = `rounded-md p-4 border ${verdictBg}`;
+            aiReviewDiv.innerHTML = `
+                <div class="flex items-center justify-between mb-2">
+                    <span class="font-medium ${verdictColor}">${verdict}</span>
+                    <span class="text-sm text-gray-500">${confidence}% confidence</span>
+                </div>
+                <div class="text-sm text-gray-700">${escapeHtml(aiReview.summary || '')}</div>
+                ${findingsHtml}
+            `;
+        } else if (tool.scrutiny_report?.ai_review_error) {
+            aiReviewSection.classList.remove('hidden');
+            aiReviewDiv.className = 'rounded-md p-4 border bg-gray-50 border-gray-200';
+            aiReviewDiv.innerHTML = `<div class="text-sm text-gray-500">AI review failed: ${escapeHtml(tool.scrutiny_report.ai_review_error)}</div>`;
+        } else {
+            aiReviewSection.classList.add('hidden');
+        }
+
        // Scrutiny Warnings
        const warningsSection = document.getElementById('detail-warnings-section');
        const warningsDiv = document.getElementById('detail-warnings');