CascadingDev/agents/researcher.py

210 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""
AI_Researcher agent.
Responds to @AI-researcher mentions by running a research-oriented AI
completion and appending a Markdown summary with cited sources. Designed to be
run as a background (non-blocking) participant.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
from textwrap import dedent
# --- Dynamic SDK Loader -----------------------------------------------------
def load_agent_context(repo_root: Path):
vendor_root = repo_root / ".cascadingdev"
if (vendor_root / "cascadingdev" / "agent").exists():
sys.path.insert(0, str(vendor_root))
from cascadingdev.agent.sdk import AgentContext
return AgentContext
project_root = Path(__file__).resolve().parents[1]
src_path = project_root / "src"
if src_path.exists():
sys.path.insert(0, str(src_path))
from cascadingdev.agent.sdk import AgentContext
return AgentContext
from cascadingdev.agent.sdk import AgentContext
return AgentContext
def load_provider_client(repo_root: Path):
vendor_root = repo_root / ".cascadingdev"
if (vendor_root / "cascadingdev" / "agent").exists():
sys.path.insert(0, str(vendor_root))
from cascadingdev.agent.providers import ProviderClient
return ProviderClient
project_root = Path(__file__).resolve().parents[1]
src_path = project_root / "src"
if src_path.exists():
sys.path.insert(0, str(src_path))
from cascadingdev.agent.providers import ProviderClient
return ProviderClient
from cascadingdev.agent.providers import ProviderClient
return ProviderClient
# --- Agent Behaviour --------------------------------------------------------
MENTION_REGEX = re.compile(r"@ai[-_]researcher\s*:?\s*(.+)", re.IGNORECASE)
BLOCK_START = "<!-- AUTO:RESEARCHER START -->"
BLOCK_END = "<!-- AUTO:RESEARCHER END -->"
PRIMER = dedent(
"""
You are AI_Researcher, a diligent technical researcher with reliable access
to current information.
Requirements:
- Interpret the request and search for authoritative, up-to-date sources.
- Produce a concise Markdown summary (2-4 bullet points) tailored to the
team's needs.
- List 2-3 credible sources with titles and URLs. Include a short insight
for each source.
- If no definitive answer exists, explain the gaps and suggest next steps.
- Do NOT include "Name:" or "VOTE:" lines. Just return the research content.
Respond with JSON:
{
"summary": "<markdown bullets>",
"sources": [
{"title": "...", "url": "...", "insight": "..."},
...
]
}
"""
).strip()
def build_prompt(query: str, discussion_text: str) -> str:
return (
f"{PRIMER}\n\n"
f"Research request: {query}\n\n"
"Recent discussion context (may help disambiguate terms):\n"
f"{discussion_text}\n\n"
"Return the JSON object now."
)
def extract_query(discussion_text: str) -> str | None:
for match in MENTION_REGEX.finditer(discussion_text):
query = match.group(1).strip()
if query:
return query
return None
def already_addressed(discussion_text: str, query: str) -> bool:
lower_query = query.lower()
search_pos = 0
while True:
start_idx = discussion_text.find(BLOCK_START, search_pos)
if start_idx == -1:
return False
end_idx = discussion_text.find(BLOCK_END, start_idx)
if end_idx == -1:
return False
block_text = discussion_text[start_idx:end_idx].lower()
if lower_query in block_text:
return True
search_pos = end_idx + len(BLOCK_END)
def format_comment(summary: str, sources: list[dict[str, str]], query: str) -> str:
lines: list[str] = [BLOCK_START, "Name: AI_Researcher"]
if summary.strip():
lines.append("### Research Findings")
lines.append(summary.strip())
else:
lines.append("### Research Findings")
lines.append(
"Could not gather substantive information yet; consider refining the request."
)
normalized_sources = [
(src.get("title", "").strip(), src.get("url", "").strip(), src.get("insight", "").strip())
for src in sources
]
normalized_sources = [src for src in normalized_sources if any(src)]
if normalized_sources:
lines.append("")
lines.append("### Sources")
for title, url, insight in normalized_sources:
descriptor = title or url or "Reference"
bullet = f"- [{descriptor}]({url})" if url else f"- {descriptor}"
if insight:
bullet += f": {insight}"
lines.append(bullet)
lines.append("")
lines.append(f"_Original request:_ {query}")
lines.append(BLOCK_END)
lines.append("")
return "\n".join(lines)
def main() -> int:
parser = argparse.ArgumentParser(description="AI Researcher agent")
parser.add_argument("--repo-root", required=True, help="Repository root path")
parser.add_argument("--path", required=True, help="Relative path to discussion file")
args = parser.parse_args()
repo_root = Path(args.repo_root).resolve()
discussion_rel = Path(args.path)
AgentContext = load_agent_context(repo_root)
ProviderClient = load_provider_client(repo_root)
context = AgentContext(repo_root, discussion_rel)
discussion_text = context.read_text()
if not discussion_text.strip():
return 0
query = extract_query(discussion_text)
if not query:
return 0
if already_addressed(discussion_text, query):
return 0
provider = ProviderClient(repo_root)
try:
response = provider.structured(build_prompt(query, discussion_text), model_hint="quality")
except Exception:
response = {"summary": "Unable to fetch research results right now; please retry later.", "sources": []}
summary = str(response.get("summary", "")).strip()
sources = response.get("sources") or []
if isinstance(sources, str):
try:
sources = json.loads(sources)
except json.JSONDecodeError:
sources = []
if not isinstance(sources, list):
sources = []
comment_block = format_comment(summary, sources, query)
context.append_block(comment_block)
# Service agents run in the background; leave changes unstaged for follow-up commits.
return 0
if __name__ == "__main__":
raise SystemExit(main())