Add AI-powered secondary review for scrutiny warnings
- Create scrutiny-ai-review tool that uses AI to analyze warnings - Integrate AI review into publish flow (app.py) - Integrate AI review into Fabric sync script - If AI review returns APPROVE with >=80% confidence, auto-approve - Display AI review results in admin pending tools modal - Shows verdict (APPROVE/REJECT/NEEDS_HUMAN_REVIEW) with confidence - Shows per-finding analysis (FALSE_POSITIVE/LEGITIMATE_CONCERN) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
b0bd692be5
commit
8471480aa7
|
|
@ -58,6 +58,62 @@ DEFAULT_STATE_FILE = DEFAULT_SYNC_DIR / "sync_state.json"
|
|||
DEFAULT_PROVIDER = "opencode-pickle"
|
||||
|
||||
|
||||
def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str):
|
||||
"""Run AI-powered secondary review of scrutiny warnings.
|
||||
|
||||
Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
|
||||
|
||||
Args:
|
||||
scrutiny_report: The scrutiny report with findings
|
||||
config: The tool configuration dict
|
||||
tool_name: Name of the tool being reviewed
|
||||
description: Tool description
|
||||
|
||||
Returns:
|
||||
AI review result dict, or None if review fails
|
||||
"""
|
||||
# Check if the AI review tool exists
|
||||
tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
|
||||
if not tool_path.exists():
|
||||
return None
|
||||
|
||||
# Extract warnings from scrutiny report
|
||||
warnings = [
|
||||
f for f in scrutiny_report.get("findings", [])
|
||||
if f.get("result") == "warning"
|
||||
]
|
||||
|
||||
if not warnings:
|
||||
return None
|
||||
|
||||
# Prepare tool config for review
|
||||
tool_config = {
|
||||
"name": tool_name,
|
||||
"description": description or "",
|
||||
"steps": config.get("steps", []),
|
||||
"arguments": config.get("arguments", []),
|
||||
}
|
||||
|
||||
# Run the tool
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"cmdforge", "run", "scrutiny-ai-review",
|
||||
"--warnings", json.dumps(warnings),
|
||||
"--tool-config", json.dumps(tool_config),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60, # 60 second timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return json.loads(result.stdout.strip())
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PatternState:
|
||||
"""State of a single pattern."""
|
||||
|
|
@ -292,8 +348,24 @@ def publish_to_registry(
|
|||
except Exception as e:
|
||||
logger.warning(f"Scrutiny failed for {name}: {e}")
|
||||
|
||||
# Check scrutiny decision
|
||||
# Run AI secondary review if there are warnings
|
||||
scrutiny_decision = scrutiny_report.get("decision", "review")
|
||||
if scrutiny_decision == "review":
|
||||
try:
|
||||
ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
|
||||
if ai_review:
|
||||
scrutiny_report["ai_review"] = ai_review
|
||||
# Update decision based on AI review
|
||||
if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
|
||||
scrutiny_report["decision"] = "approve"
|
||||
scrutiny_report["ai_approved"] = True
|
||||
scrutiny_decision = "approve"
|
||||
logger.info(f" AI review approved {name} (confidence: {ai_review.get('confidence', 0):.2f})")
|
||||
except Exception as e:
|
||||
scrutiny_report["ai_review_error"] = str(e)
|
||||
logger.warning(f"AI review failed for {name}: {e}")
|
||||
|
||||
# Check scrutiny decision
|
||||
if scrutiny_decision == "reject":
|
||||
fail_findings = [f for f in scrutiny_report.get("findings", []) if f.get("result") == "fail"]
|
||||
fail_msg = fail_findings[0]["message"] if fail_findings else "quality too low"
|
||||
|
|
|
|||
|
|
@ -194,6 +194,65 @@ def select_version(versions: List[str], constraint_raw: Optional[str]) -> Option
|
|||
return max(filtered, key=lambda item: item[0])[1]
|
||||
|
||||
|
||||
def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str) -> Optional[dict]:
|
||||
"""Run AI-powered secondary review of scrutiny warnings.
|
||||
|
||||
Uses the scrutiny-ai-review tool to analyze warnings and identify false positives.
|
||||
|
||||
Args:
|
||||
scrutiny_report: The scrutiny report with findings
|
||||
config: The tool configuration dict
|
||||
tool_name: Name of the tool being reviewed
|
||||
description: Tool description
|
||||
|
||||
Returns:
|
||||
AI review result dict, or None if review fails
|
||||
"""
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
# Check if the AI review tool exists
|
||||
tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml"
|
||||
if not tool_path.exists():
|
||||
return None
|
||||
|
||||
# Extract warnings from scrutiny report
|
||||
warnings = [
|
||||
f for f in scrutiny_report.get("findings", [])
|
||||
if f.get("result") == "warning"
|
||||
]
|
||||
|
||||
if not warnings:
|
||||
return None
|
||||
|
||||
# Prepare tool config for review
|
||||
tool_config = {
|
||||
"name": tool_name,
|
||||
"description": description or "",
|
||||
"steps": config.get("steps", []),
|
||||
"arguments": config.get("arguments", []),
|
||||
}
|
||||
|
||||
# Run the tool
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"cmdforge", "run", "scrutiny-ai-review",
|
||||
"--warnings", json.dumps(warnings),
|
||||
"--tool-config", json.dumps(tool_config),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60, # 60 second timeout
|
||||
)
|
||||
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return json.loads(result.stdout.strip())
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
||||
return None
|
||||
|
||||
|
||||
def create_app() -> Flask:
|
||||
app = Flask(__name__)
|
||||
app.config["MAX_CONTENT_LENGTH"] = MAX_BODY_BYTES
|
||||
|
|
@ -1787,6 +1846,20 @@ def create_app() -> Flask:
|
|||
details={"scrutiny": scrutiny_report},
|
||||
)
|
||||
|
||||
# Run AI secondary review if there are warnings
|
||||
if scrutiny_report.get("decision") == "review":
|
||||
try:
|
||||
ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description)
|
||||
if ai_review:
|
||||
scrutiny_report["ai_review"] = ai_review
|
||||
# Update decision based on AI review
|
||||
if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8:
|
||||
scrutiny_report["decision"] = "approve"
|
||||
scrutiny_report["ai_approved"] = True
|
||||
except Exception as e:
|
||||
# Don't fail publish if AI review fails
|
||||
scrutiny_report["ai_review_error"] = str(e)
|
||||
|
||||
if dry_run:
|
||||
return jsonify({
|
||||
"data": {
|
||||
|
|
|
|||
|
|
@ -153,6 +153,12 @@
|
|||
<div class="p-6 overflow-y-auto flex-grow">
|
||||
<div id="detail-loading" class="py-8 text-center text-gray-500">Loading...</div>
|
||||
<div id="detail-content" class="hidden">
|
||||
<!-- AI Review (if available) -->
|
||||
<div id="detail-ai-review-section" class="mb-6 hidden">
|
||||
<h4 class="text-sm font-medium text-gray-700 mb-2">AI Review</h4>
|
||||
<div id="detail-ai-review" class="rounded-md p-4"></div>
|
||||
</div>
|
||||
|
||||
<!-- Scrutiny Warnings -->
|
||||
<div id="detail-warnings-section" class="mb-6 hidden">
|
||||
<h4 class="text-sm font-medium text-gray-700 mb-2">Scrutiny Warnings</h4>
|
||||
|
|
@ -239,6 +245,55 @@ async function viewTool(toolId) {
|
|||
// Update title
|
||||
document.getElementById('detail-title').textContent = `${tool.owner}/${tool.name} v${tool.version}`;
|
||||
|
||||
// AI Review (if available)
|
||||
const aiReviewSection = document.getElementById('detail-ai-review-section');
|
||||
const aiReviewDiv = document.getElementById('detail-ai-review');
|
||||
const aiReview = tool.scrutiny_report?.ai_review;
|
||||
if (aiReview) {
|
||||
aiReviewSection.classList.remove('hidden');
|
||||
const verdict = aiReview.overall_verdict || 'UNKNOWN';
|
||||
const confidence = (aiReview.confidence * 100).toFixed(0) || '?';
|
||||
let verdictColor, verdictBg;
|
||||
if (verdict === 'APPROVE') {
|
||||
verdictColor = 'text-green-800';
|
||||
verdictBg = 'bg-green-50 border-green-200';
|
||||
} else if (verdict === 'REJECT') {
|
||||
verdictColor = 'text-red-800';
|
||||
verdictBg = 'bg-red-50 border-red-200';
|
||||
} else {
|
||||
verdictColor = 'text-yellow-800';
|
||||
verdictBg = 'bg-yellow-50 border-yellow-200';
|
||||
}
|
||||
|
||||
let findingsHtml = '';
|
||||
if (aiReview.findings && aiReview.findings.length > 0) {
|
||||
findingsHtml = '<div class="mt-3 space-y-2">' + aiReview.findings.map(f => {
|
||||
let fColor = f.verdict === 'FALSE_POSITIVE' ? 'text-green-700' :
|
||||
f.verdict === 'LEGITIMATE_CONCERN' ? 'text-red-700' : 'text-yellow-700';
|
||||
return `<div class="text-sm border-l-2 pl-3 ${f.verdict === 'FALSE_POSITIVE' ? 'border-green-400' : f.verdict === 'LEGITIMATE_CONCERN' ? 'border-red-400' : 'border-yellow-400'}">
|
||||
<div class="font-medium ${fColor}">${escapeHtml(f.verdict || 'UNKNOWN')}</div>
|
||||
<div class="text-gray-600">${escapeHtml(f.explanation || '')}</div>
|
||||
</div>`;
|
||||
}).join('') + '</div>';
|
||||
}
|
||||
|
||||
aiReviewDiv.className = `rounded-md p-4 border ${verdictBg}`;
|
||||
aiReviewDiv.innerHTML = `
|
||||
<div class="flex items-center justify-between mb-2">
|
||||
<span class="font-medium ${verdictColor}">${verdict}</span>
|
||||
<span class="text-sm text-gray-500">${confidence}% confidence</span>
|
||||
</div>
|
||||
<div class="text-sm text-gray-700">${escapeHtml(aiReview.summary || '')}</div>
|
||||
${findingsHtml}
|
||||
`;
|
||||
} else if (tool.scrutiny_report?.ai_review_error) {
|
||||
aiReviewSection.classList.remove('hidden');
|
||||
aiReviewDiv.className = 'rounded-md p-4 border bg-gray-50 border-gray-200';
|
||||
aiReviewDiv.innerHTML = `<div class="text-sm text-gray-500">AI review failed: ${escapeHtml(tool.scrutiny_report.ai_review_error)}</div>`;
|
||||
} else {
|
||||
aiReviewSection.classList.add('hidden');
|
||||
}
|
||||
|
||||
// Scrutiny Warnings
|
||||
const warningsSection = document.getElementById('detail-warnings-section');
|
||||
const warningsDiv = document.getElementById('detail-warnings');
|
||||
|
|
|
|||
Loading…
Reference in New Issue