From 8471480aa7b61f88e3b6b18e8525ab0fd66e7eb7 Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 16 Jan 2026 16:03:44 -0400 Subject: [PATCH] Add AI-powered secondary review for scrutiny warnings - Create scrutiny-ai-review tool that uses AI to analyze warnings - Integrate AI review into publish flow (app.py) - Integrate AI review into Fabric sync script - If AI review returns APPROVE with >=80% confidence, auto-approve - Display AI review results in admin pending tools modal - Shows verdict (APPROVE/REJECT/NEEDS_HUMAN_REVIEW) with confidence - Shows per-finding analysis (FALSE_POSITIVE/LEGITIMATE_CONCERN) Co-Authored-By: Claude Opus 4.5 --- scripts/fabric_sync.py | 74 ++++++++++++++++++- src/cmdforge/registry/app.py | 73 ++++++++++++++++++ src/cmdforge/web/templates/admin/pending.html | 55 ++++++++++++++ 3 files changed, 201 insertions(+), 1 deletion(-) diff --git a/scripts/fabric_sync.py b/scripts/fabric_sync.py index 769a7f3..6b1e32e 100755 --- a/scripts/fabric_sync.py +++ b/scripts/fabric_sync.py @@ -58,6 +58,62 @@ DEFAULT_STATE_FILE = DEFAULT_SYNC_DIR / "sync_state.json" DEFAULT_PROVIDER = "opencode-pickle" +def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str): + """Run AI-powered secondary review of scrutiny warnings. + + Uses the scrutiny-ai-review tool to analyze warnings and identify false positives. + + Args: + scrutiny_report: The scrutiny report with findings + config: The tool configuration dict + tool_name: Name of the tool being reviewed + description: Tool description + + Returns: + AI review result dict, or None if review fails + """ + # Check if the AI review tool exists + tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml" + if not tool_path.exists(): + return None + + # Extract warnings from scrutiny report + warnings = [ + f for f in scrutiny_report.get("findings", []) + if f.get("result") == "warning" + ] + + if not warnings: + return None + + # Prepare tool config for review + tool_config = { + "name": tool_name, + "description": description or "", + "steps": config.get("steps", []), + "arguments": config.get("arguments", []), + } + + # Run the tool + try: + result = subprocess.run( + [ + "cmdforge", "run", "scrutiny-ai-review", + "--warnings", json.dumps(warnings), + "--tool-config", json.dumps(tool_config), + ], + capture_output=True, + text=True, + timeout=60, # 60 second timeout + ) + + if result.returncode == 0 and result.stdout.strip(): + return json.loads(result.stdout.strip()) + return None + except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError): + return None + + @dataclass class PatternState: """State of a single pattern.""" @@ -292,8 +348,24 @@ def publish_to_registry( except Exception as e: logger.warning(f"Scrutiny failed for {name}: {e}") - # Check scrutiny decision + # Run AI secondary review if there are warnings scrutiny_decision = scrutiny_report.get("decision", "review") + if scrutiny_decision == "review": + try: + ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description) + if ai_review: + scrutiny_report["ai_review"] = ai_review + # Update decision based on AI review + if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8: + scrutiny_report["decision"] = "approve" + scrutiny_report["ai_approved"] = True + scrutiny_decision = "approve" + logger.info(f" AI review approved {name} (confidence: {ai_review.get('confidence', 0):.2f})") + except Exception as e: + scrutiny_report["ai_review_error"] = str(e) + logger.warning(f"AI review failed for {name}: {e}") + + # Check scrutiny decision if scrutiny_decision == "reject": fail_findings = [f for f in scrutiny_report.get("findings", []) if f.get("result") == "fail"] fail_msg = fail_findings[0]["message"] if fail_findings else "quality too low" diff --git a/src/cmdforge/registry/app.py b/src/cmdforge/registry/app.py index 28bfca5..feaa69a 100644 --- a/src/cmdforge/registry/app.py +++ b/src/cmdforge/registry/app.py @@ -194,6 +194,65 @@ def select_version(versions: List[str], constraint_raw: Optional[str]) -> Option return max(filtered, key=lambda item: item[0])[1] +def run_ai_scrutiny_review(scrutiny_report: dict, config: dict, tool_name: str, description: str) -> Optional[dict]: + """Run AI-powered secondary review of scrutiny warnings. + + Uses the scrutiny-ai-review tool to analyze warnings and identify false positives. + + Args: + scrutiny_report: The scrutiny report with findings + config: The tool configuration dict + tool_name: Name of the tool being reviewed + description: Tool description + + Returns: + AI review result dict, or None if review fails + """ + import subprocess + from pathlib import Path + + # Check if the AI review tool exists + tool_path = Path.home() / ".cmdforge" / "scrutiny-ai-review" / "config.yaml" + if not tool_path.exists(): + return None + + # Extract warnings from scrutiny report + warnings = [ + f for f in scrutiny_report.get("findings", []) + if f.get("result") == "warning" + ] + + if not warnings: + return None + + # Prepare tool config for review + tool_config = { + "name": tool_name, + "description": description or "", + "steps": config.get("steps", []), + "arguments": config.get("arguments", []), + } + + # Run the tool + try: + result = subprocess.run( + [ + "cmdforge", "run", "scrutiny-ai-review", + "--warnings", json.dumps(warnings), + "--tool-config", json.dumps(tool_config), + ], + capture_output=True, + text=True, + timeout=60, # 60 second timeout + ) + + if result.returncode == 0 and result.stdout.strip(): + return json.loads(result.stdout.strip()) + return None + except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError): + return None + + def create_app() -> Flask: app = Flask(__name__) app.config["MAX_CONTENT_LENGTH"] = MAX_BODY_BYTES @@ -1787,6 +1846,20 @@ def create_app() -> Flask: details={"scrutiny": scrutiny_report}, ) + # Run AI secondary review if there are warnings + if scrutiny_report.get("decision") == "review": + try: + ai_review = run_ai_scrutiny_review(scrutiny_report, config, name, description) + if ai_review: + scrutiny_report["ai_review"] = ai_review + # Update decision based on AI review + if ai_review.get("overall_verdict") == "APPROVE" and ai_review.get("confidence", 0) >= 0.8: + scrutiny_report["decision"] = "approve" + scrutiny_report["ai_approved"] = True + except Exception as e: + # Don't fail publish if AI review fails + scrutiny_report["ai_review_error"] = str(e) + if dry_run: return jsonify({ "data": { diff --git a/src/cmdforge/web/templates/admin/pending.html b/src/cmdforge/web/templates/admin/pending.html index 72ccd68..e65c7dd 100644 --- a/src/cmdforge/web/templates/admin/pending.html +++ b/src/cmdforge/web/templates/admin/pending.html @@ -153,6 +153,12 @@
Loading...