Add AI-powered secondary review for scrutiny warnings

- Create scrutiny-ai-review tool that uses AI to analyze warnings
- Integrate AI review into publish flow (app.py)
- Integrate AI review into Fabric sync script
- If AI review returns APPROVE with >=80% confidence, auto-approve
- Display AI review results in admin pending tools modal
- Shows verdict (APPROVE/REJECT/NEEDS_HUMAN_REVIEW) with confidence
- Shows per-finding analysis (FALSE_POSITIVE/LEGITIMATE_CONCERN)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
rob 2026-01-16 16:03:44 -04:00
parent b0bd692be5
commit 8471480aa7
3 changed files with 201 additions and 1 deletions

View File

@ -58,6 +58,62 @@ DEFAULT_STATE_FILE = DEFAULT_SYNC_DIR / "sync_state.json"
DEFAULT_PROVIDER = "opencode-pickle"
def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str):
"""Run AI-powered secondary review of scrutiny warnings.
Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
Args:
scrutiny_report: The scrutiny report with findings
config: The tool configuration dict
tool_name: Name of the tool being reviewed
description: Tool description
Returns:
AI review result dict, or None if review fails
"""
# Check if the AI review tool exists
tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
if not tool_path.exists():
return None
# Extract warnings from scrutiny report
warnings = [
f for f in scrutiny_report.get("findings", [])
if f.get("result") == "warning"
]
if not warnings:
return None
# Prepare tool config for review
tool_config = {
"name": tool_name,
"description": description or "",
"steps": config.get("steps", []),
"arguments": config.get("arguments", []),
}
# Run the tool
try:
result = subprocess.run(
[
"cmdforge", "run", "scrutiny-ai-review",
"--warnings", json.dumps(warnings),
"--tool-config", json.dumps(tool_config),
],
capture_output=True,
text=True,
timeout=60, # 60 second timeout
)
if result.returncode == 0 and result.stdout.strip():
return json.loads(result.stdout.strip())
return None
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
return None
@dataclass
class PatternState:
"""State of a single pattern."""
@ -292,8 +348,24 @@ def publish_to_registry(
except Exception as e:
logger.warning(f"Scrutiny failed for {name}: {e}")
# Check scrutiny decision
# Run AI secondary review if there are warnings
scrutiny_decision = scrutiny_report.get("decision", "review")
if scrutiny_decision == "review":
try:
ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
if ai_review:
scrutiny_report["ai_review"] = ai_review
# Update decision based on AI review
if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
scrutiny_report["decision"] = "approve"
scrutiny_report["ai_approved"] = True
scrutiny_decision = "approve"
logger.info(f" AI review approved {name} (confidence: {ai_review.get('confidence', 0):.2f})")
except Exception as e:
scrutiny_report["ai_review_error"] = str(e)
logger.warning(f"AI review failed for {name}: {e}")
# Check scrutiny decision
if scrutiny_decision == "reject":
fail_findings = [f for f in scrutiny_report.get("findings", []) if f.get("result") == "fail"]
fail_msg = fail_findings[0]["message"] if fail_findings else "quality too low"

View File

@ -194,6 +194,65 @@ def select_version(versions: List[str], constraint_raw: Optional[str]) -> Option
return max(filtered, key=lambda item: item[0])[1]
def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str) -> Optional[dict]:
"""Run AI-powered secondary review of scrutiny warnings.
Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
Args:
scrutiny_report: The scrutiny report with findings
config: The tool configuration dict
tool_name: Name of the tool being reviewed
description: Tool description
Returns:
AI review result dict, or None if review fails
"""
import subprocess
from pathlib import Path
# Check if the AI review tool exists
tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
if not tool_path.exists():
return None
# Extract warnings from scrutiny report
warnings = [
f for f in scrutiny_report.get("findings", [])
if f.get("result") == "warning"
]
if not warnings:
return None
# Prepare tool config for review
tool_config = {
"name": tool_name,
"description": description or "",
"steps": config.get("steps", []),
"arguments": config.get("arguments", []),
}
# Run the tool
try:
result = subprocess.run(
[
"cmdforge", "run", "scrutiny-ai-review",
"--warnings", json.dumps(warnings),
"--tool-config", json.dumps(tool_config),
],
capture_output=True,
text=True,
timeout=60, # 60 second timeout
)
if result.returncode == 0 and result.stdout.strip():
return json.loads(result.stdout.strip())
return None
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
return None
def create_app() -> Flask:
app = Flask(__name__)
app.config["MAX_CONTENT_LENGTH"] = MAX_BODY_BYTES
@ -1787,6 +1846,20 @@ def create_app() -> Flask:
details={"scrutiny": scrutiny_report},
)
# Run AI secondary review if there are warnings
if scrutiny_report.get("decision") == "review":
try:
ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
if ai_review:
scrutiny_report["ai_review"] = ai_review
# Update decision based on AI review
if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
scrutiny_report["decision"] = "approve"
scrutiny_report["ai_approved"] = True
except Exception as e:
# Don't fail publish if AI review fails
scrutiny_report["ai_review_error"] = str(e)
if dry_run:
return jsonify({
"data": {

View File

@ -153,6 +153,12 @@
<div class="p-6 overflow-y-auto flex-grow">
<div id="detail-loading" class="py-8 text-center text-gray-500">Loading...</div>
<div id="detail-content" class="hidden">
<!-- AI Review (if available) -->
<div id="detail-ai-review-section" class="mb-6 hidden">
<h4 class="text-sm font-medium text-gray-700 mb-2">AI Review</h4>
<div id="detail-ai-review" class="rounded-md p-4"></div>
</div>
<!-- Scrutiny Warnings -->
<div id="detail-warnings-section" class="mb-6 hidden">
<h4 class="text-sm font-medium text-gray-700 mb-2">Scrutiny Warnings</h4>
@ -239,6 +245,55 @@ async function viewTool(toolId) {
// Update title
document.getElementById('detail-title').textContent = `${tool.owner}/${tool.name} v${tool.version}`;
// AI Review (if available)
const aiReviewSection = document.getElementById('detail-ai-review-section');
const aiReviewDiv = document.getElementById('detail-ai-review');
const aiReview = tool.scrutiny_report?.ai_review;
if (aiReview) {
aiReviewSection.classList.remove('hidden');
const verdict = aiReview.overall_verdict || 'UNKNOWN';
const confidence = (aiReview.confidence * 100).toFixed(0) || '?';
let verdictColor, verdictBg;
if (verdict === 'APPROVE') {
verdictColor = 'text-green-800';
verdictBg = 'bg-green-50 border-green-200';
} else if (verdict === 'REJECT') {
verdictColor = 'text-red-800';
verdictBg = 'bg-red-50 border-red-200';
} else {
verdictColor = 'text-yellow-800';
verdictBg = 'bg-yellow-50 border-yellow-200';
}
let findingsHtml = '';
if (aiReview.findings && aiReview.findings.length > 0) {
findingsHtml = '<div class="mt-3 space-y-2">' + aiReview.findings.map(f => {
let fColor = f.verdict === 'FALSE_POSITIVE' ? 'text-green-700' :
f.verdict === 'LEGITIMATE_CONCERN' ? 'text-red-700' : 'text-yellow-700';
return `<div class="text-sm border-l-2 pl-3 ${f.verdict === 'FALSE_POSITIVE' ? 'border-green-400' : f.verdict === 'LEGITIMATE_CONCERN' ? 'border-red-400' : 'border-yellow-400'}">
<div class="font-medium ${fColor}">${escapeHtml(f.verdict || 'UNKNOWN')}</div>
<div class="text-gray-600">${escapeHtml(f.explanation || '')}</div>
</div>`;
}).join('') + '</div>';
}
aiReviewDiv.className = `rounded-md p-4 border ${verdictBg}`;
aiReviewDiv.innerHTML = `
<div class="flex items-center justify-between mb-2">
<span class="font-medium ${verdictColor}">${verdict}</span>
<span class="text-sm text-gray-500">${confidence}% confidence</span>
</div>
<div class="text-sm text-gray-700">${escapeHtml(aiReview.summary || '')}</div>
${findingsHtml}
`;
} else if (tool.scrutiny_report?.ai_review_error) {
aiReviewSection.classList.remove('hidden');
aiReviewDiv.className = 'rounded-md p-4 border bg-gray-50 border-gray-200';
aiReviewDiv.innerHTML = `<div class="text-sm text-gray-500">AI review failed: ${escapeHtml(tool.scrutiny_report.ai_review_error)}</div>`;
} else {
aiReviewSection.classList.add('hidden');
}
// Scrutiny Warnings
const warningsSection = document.getElementById('detail-warnings-section');
const warningsDiv = document.getElementById('detail-warnings');