CascadingDev/agents/researcher.py

#!/usr/bin/env python3
"""
AI_Researcher agent.

Responds to @AI-researcher mentions by running a research-oriented AI
completion and appending a Markdown summary with cited sources. Designed to be
run as a background (non-blocking) participant.
"""

from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path
from textwrap import dedent

# --- Dynamic SDK Loader -----------------------------------------------------


def load_agent_context(repo_root: Path):
    vendor_root = repo_root / ".cascadingdev"
    if (vendor_root / "cascadingdev" / "agent").exists():
        sys.path.insert(0, str(vendor_root))
        from cascadingdev.agent.sdk import AgentContext

        return AgentContext
    project_root = Path(__file__).resolve().parents[1]
    src_path = project_root / "src"
    if src_path.exists():
        sys.path.insert(0, str(src_path))
        from cascadingdev.agent.sdk import AgentContext

        return AgentContext
    from cascadingdev.agent.sdk import AgentContext

    return AgentContext


def load_provider_client(repo_root: Path):
    vendor_root = repo_root / ".cascadingdev"
    if (vendor_root / "cascadingdev" / "agent").exists():
        sys.path.insert(0, str(vendor_root))
        from cascadingdev.agent.providers import ProviderClient

        return ProviderClient
    project_root = Path(__file__).resolve().parents[1]
    src_path = project_root / "src"
    if src_path.exists():
        sys.path.insert(0, str(src_path))
        from cascadingdev.agent.providers import ProviderClient

        return ProviderClient
    from cascadingdev.agent.providers import ProviderClient

    return ProviderClient


# --- Agent Behaviour --------------------------------------------------------

MENTION_REGEX = re.compile(r"@ai[-_]researcher\s*:?\s*(.+)", re.IGNORECASE)
BLOCK_START = "<!-- AUTO:RESEARCHER START -->"
BLOCK_END = "<!-- AUTO:RESEARCHER END -->"

PRIMER = dedent(
    """
    You are AI_Researcher, a diligent technical researcher with reliable access
    to current information.

    Requirements:
    - Interpret the request and search for authoritative, up-to-date sources.
    - Produce a concise Markdown summary (2-4 bullet points) tailored to the
      team's needs.
    - List 2-3 credible sources with titles and URLs. Include a short insight
      for each source.
    - If no definitive answer exists, explain the gaps and suggest next steps.
    - Do NOT include "Name:" or "VOTE:" lines. Just return the research content.

    Respond with JSON:
    {
      "summary": "<markdown bullets>",
      "sources": [
        {"title": "...", "url": "...", "insight": "..."},
        ...
      ]
    }
    """
).strip()


def build_prompt(query: str, discussion_text: str) -> str:
    return (
        f"{PRIMER}\n\n"
        f"Research request: {query}\n\n"
        "Recent discussion context (may help disambiguate terms):\n"
        f"{discussion_text}\n\n"
        "Return the JSON object now."
    )


def extract_query(discussion_text: str) -> str | None:
    for match in MENTION_REGEX.finditer(discussion_text):
        query = match.group(1).strip()
        if query:
            return query
    return None


def already_addressed(discussion_text: str, query: str) -> bool:
    lower_query = query.lower()
    search_pos = 0
    while True:
        start_idx = discussion_text.find(BLOCK_START, search_pos)
        if start_idx == -1:
            return False
        end_idx = discussion_text.find(BLOCK_END, start_idx)
        if end_idx == -1:
            return False
        block_text = discussion_text[start_idx:end_idx].lower()
        if lower_query in block_text:
            return True
        search_pos = end_idx + len(BLOCK_END)


def format_comment(summary: str, sources: list[dict[str, str]], query: str) -> str:
    lines: list[str] = [BLOCK_START, "Name: AI_Researcher"]

    if summary.strip():
        lines.append("### Research Findings")
        lines.append(summary.strip())
    else:
        lines.append("### Research Findings")
        lines.append(
            "Could not gather substantive information yet; consider refining the request."
        )

    normalized_sources = [
        (src.get("title", "").strip(), src.get("url", "").strip(), src.get("insight", "").strip())
        for src in sources
    ]
    normalized_sources = [src for src in normalized_sources if any(src)]

    if normalized_sources:
        lines.append("")
        lines.append("### Sources")
        for title, url, insight in normalized_sources:
            descriptor = title or url or "Reference"
            bullet = f"- [{descriptor}]({url})" if url else f"- {descriptor}"
            if insight:
                bullet += f": {insight}"
            lines.append(bullet)

    lines.append("")
    lines.append(f"_Original request:_ {query}")
    lines.append(BLOCK_END)
    lines.append("")
    return "\n".join(lines)


def main() -> int:
    parser = argparse.ArgumentParser(description="AI Researcher agent")
    parser.add_argument("--repo-root", required=True, help="Repository root path")
    parser.add_argument("--path", required=True, help="Relative path to discussion file")
    args = parser.parse_args()

    repo_root = Path(args.repo_root).resolve()
    discussion_rel = Path(args.path)

    AgentContext = load_agent_context(repo_root)
    ProviderClient = load_provider_client(repo_root)

    context = AgentContext(repo_root, discussion_rel)
    discussion_text = context.read_text()
    if not discussion_text.strip():
        return 0

    query = extract_query(discussion_text)
    if not query:
        return 0

    if already_addressed(discussion_text, query):
        return 0

    provider = ProviderClient(repo_root)
    try:
        response = provider.structured(build_prompt(query, discussion_text), model_hint="quality")
    except Exception:
        response = {"summary": "Unable to fetch research results right now; please retry later.", "sources": []}

    summary = str(response.get("summary", "")).strip()
    sources = response.get("sources") or []
    if isinstance(sources, str):
        try:
            sources = json.loads(sources)
        except json.JSONDecodeError:
            sources = []
    if not isinstance(sources, list):
        sources = []

    comment_block = format_comment(summary, sources, query)
    context.append_block(comment_block)
    # Service agents run in the background; leave changes unstaged for follow-up commits.
    return 0


if __name__ == "__main__":
    raise SystemExit(main())