feat: parse codex exec json output for fallbacks
This commit is contained in:
parent
e9cc23685d
commit
0e83b6cf88
|
|
@ -13,6 +13,9 @@ import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
from automation.ai_config import (
|
from automation.ai_config import (
|
||||||
DEFAULT_COMMAND_CHAIN,
|
DEFAULT_COMMAND_CHAIN,
|
||||||
|
|
@ -32,6 +35,7 @@ class ModelConfig:
|
||||||
"""Configuration for invoking the AI model command."""
|
"""Configuration for invoking the AI model command."""
|
||||||
commands: list[str]
|
commands: list[str]
|
||||||
sentinel: str
|
sentinel: str
|
||||||
|
runner_settings: Any | None = None # RunnerSettings from ai_config
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_sources(cls, repo_root: Path, override: str | None = None) -> "ModelConfig":
|
def from_sources(cls, repo_root: Path, override: str | None = None) -> "ModelConfig":
|
||||||
|
|
@ -54,7 +58,23 @@ class ModelConfig:
|
||||||
if not commands:
|
if not commands:
|
||||||
commands = settings.runner.command_chain or DEFAULT_COMMAND_CHAIN.copy()
|
commands = settings.runner.command_chain or DEFAULT_COMMAND_CHAIN.copy()
|
||||||
|
|
||||||
return cls(commands=commands, sentinel=sentinel)
|
return cls(commands=commands, sentinel=sentinel, runner_settings=settings.runner)
|
||||||
|
|
||||||
|
def get_commands_for_hint(self, hint: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Get the appropriate command chain based on the model hint.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hint: 'fast', 'quality', or empty string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of commands to try
|
||||||
|
"""
|
||||||
|
if self.runner_settings:
|
||||||
|
hint_commands = self.runner_settings.get_chain_for_hint(hint)
|
||||||
|
if hint_commands:
|
||||||
|
return hint_commands
|
||||||
|
return self.commands
|
||||||
|
|
||||||
|
|
||||||
def generate_output(
|
def generate_output(
|
||||||
|
|
@ -64,6 +84,7 @@ def generate_output(
|
||||||
source_rel: Path,
|
source_rel: Path,
|
||||||
output_rel: Path,
|
output_rel: Path,
|
||||||
instruction: str,
|
instruction: str,
|
||||||
|
model_hint: str = "",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Generates or refreshes an output artifact using AI, based on staged changes
|
Generates or refreshes an output artifact using AI, based on staged changes
|
||||||
|
|
@ -82,6 +103,7 @@ def generate_output(
|
||||||
source_rel: The path to the source file relative to repo_root.
|
source_rel: The path to the source file relative to repo_root.
|
||||||
output_rel: The path to the output file relative to repo_root.
|
output_rel: The path to the output file relative to repo_root.
|
||||||
instruction: The instruction for the AI model.
|
instruction: The instruction for the AI model.
|
||||||
|
model_hint: Optional hint ('fast' or 'quality') to guide model selection.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
PatchGenerationError: If AI output is empty or patch application fails.
|
PatchGenerationError: If AI output is empty or patch application fails.
|
||||||
|
|
@ -113,10 +135,11 @@ def generate_output(
|
||||||
output_content=output_preimage,
|
output_content=output_preimage,
|
||||||
instruction=instruction,
|
instruction=instruction,
|
||||||
no_change_token=model.sentinel,
|
no_change_token=model.sentinel,
|
||||||
|
model_hint=model_hint,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Call the AI model and get its raw output.
|
# Call the AI model and get its raw output.
|
||||||
raw_patch, no_changes = call_model(model, prompt, cwd=repo_root)
|
raw_patch, no_changes = call_model(model, prompt, model_hint, cwd=repo_root)
|
||||||
if no_changes:
|
if no_changes:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -299,7 +322,7 @@ PROMPT_TEMPLATE = """You are assisting with automated artifact generation during
|
||||||
|
|
||||||
SOURCE FILE: {source_path}
|
SOURCE FILE: {source_path}
|
||||||
OUTPUT FILE: {output_path}
|
OUTPUT FILE: {output_path}
|
||||||
|
{model_hint_line}
|
||||||
=== SOURCE FILE CHANGES (staged) ===
|
=== SOURCE FILE CHANGES (staged) ===
|
||||||
{source_diff}
|
{source_diff}
|
||||||
|
|
||||||
|
|
@ -348,6 +371,7 @@ def build_prompt(
|
||||||
output_content: str,
|
output_content: str,
|
||||||
instruction: str,
|
instruction: str,
|
||||||
no_change_token: str,
|
no_change_token: str,
|
||||||
|
model_hint: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Constructs the full prompt string for the AI model by formatting the
|
Constructs the full prompt string for the AI model by formatting the
|
||||||
|
|
@ -360,13 +384,20 @@ def build_prompt(
|
||||||
source_content: Content of the staged source file.
|
source_content: Content of the staged source file.
|
||||||
output_content: Current content of the output file (pre-image).
|
output_content: Current content of the output file (pre-image).
|
||||||
instruction: Specific instructions for the AI.
|
instruction: Specific instructions for the AI.
|
||||||
|
model_hint: Optional hint ('fast' or 'quality') for model selection.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The formatted prompt string.
|
The formatted prompt string.
|
||||||
"""
|
"""
|
||||||
|
# Format the model hint line if provided
|
||||||
|
model_hint_line = ""
|
||||||
|
if model_hint:
|
||||||
|
model_hint_line = f"TASK COMPLEXITY: {model_hint.upper()}\n"
|
||||||
|
|
||||||
return PROMPT_TEMPLATE.format(
|
return PROMPT_TEMPLATE.format(
|
||||||
source_path=source_rel.as_posix(),
|
source_path=source_rel.as_posix(),
|
||||||
output_path=output_rel.as_posix(),
|
output_path=output_rel.as_posix(),
|
||||||
|
model_hint_line=model_hint_line,
|
||||||
source_diff=source_diff.strip(),
|
source_diff=source_diff.strip(),
|
||||||
source_content=source_content.strip(),
|
source_content=source_content.strip(),
|
||||||
output_content=output_content.strip() or "(empty)", # Indicate if output content is empty.
|
output_content=output_content.strip() or "(empty)", # Indicate if output content is empty.
|
||||||
|
|
@ -375,13 +406,14 @@ def build_prompt(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def call_model(model: ModelConfig, prompt: str, cwd: Path) -> tuple[str, bool]:
|
def call_model(model: ModelConfig, prompt: str, model_hint: str, cwd: Path) -> tuple[str, bool]:
|
||||||
"""
|
"""
|
||||||
Invokes the AI model command with the given prompt and captures its output.
|
Invokes the AI model command with the given prompt and captures its output.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model: The ModelConfig object containing the AI command.
|
model: The ModelConfig object containing the AI command.
|
||||||
prompt: The input prompt string for the AI model.
|
prompt: The input prompt string for the AI model.
|
||||||
|
model_hint: Optional hint ('fast' or 'quality') for model selection.
|
||||||
cwd: The current working directory for executing the command.
|
cwd: The current working directory for executing the command.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
@ -392,30 +424,28 @@ def call_model(model: ModelConfig, prompt: str, cwd: Path) -> tuple[str, bool]:
|
||||||
"""
|
"""
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
|
||||||
for command in model.commands:
|
# Get commands based on hint
|
||||||
result = subprocess.run(
|
commands = model.get_commands_for_hint(model_hint)
|
||||||
command,
|
|
||||||
input=prompt,
|
|
||||||
text=True,
|
|
||||||
capture_output=True,
|
|
||||||
cwd=str(cwd),
|
|
||||||
shell=True,
|
|
||||||
)
|
|
||||||
raw_stdout = result.stdout or ""
|
|
||||||
stdout = raw_stdout.strip()
|
|
||||||
stderr = result.stderr.strip()
|
|
||||||
|
|
||||||
if stdout:
|
for command in commands:
|
||||||
if stdout == model.sentinel:
|
executor, raw_stdout, stderr, returncode = _run_ai_command(command, prompt, cwd)
|
||||||
|
|
||||||
|
if raw_stdout:
|
||||||
|
stripped = raw_stdout.strip()
|
||||||
|
if stripped == model.sentinel:
|
||||||
return raw_stdout, True
|
return raw_stdout, True
|
||||||
if "API Error:" in raw_stdout and "Overloaded" in raw_stdout:
|
if "API Error:" in raw_stdout and "Overloaded" in raw_stdout:
|
||||||
raise PatchGenerationError("Claude API is overloaded (500 error) - please retry later")
|
raise PatchGenerationError("Claude API is overloaded (500 error) - please retry later")
|
||||||
|
if "<<<AI_DIFF_START>>>" in raw_stdout:
|
||||||
return raw_stdout, False
|
return raw_stdout, False
|
||||||
|
# Non-empty output without diff markers counts as failure so we can try fallbacks.
|
||||||
|
errors.append(f"{executor!r} produced non-diff output: {stripped[:80]}")
|
||||||
|
continue
|
||||||
|
|
||||||
if result.returncode == 0:
|
if returncode == 0:
|
||||||
errors.append(f"{command!r} produced no output")
|
errors.append(f"{executor!r} produced no output")
|
||||||
else:
|
else:
|
||||||
errors.append(f"{command!r} exited with {result.returncode}: {stderr or 'no stderr'}")
|
errors.append(f"{executor!r} exited with {returncode}: {stderr or 'no stderr'}")
|
||||||
|
|
||||||
raise PatchGenerationError("AI command(s) failed: " + "; ".join(errors))
|
raise PatchGenerationError("AI command(s) failed: " + "; ".join(errors))
|
||||||
|
|
||||||
|
|
@ -621,3 +651,70 @@ def run(args: list[str], cwd: Path, check: bool = True) -> subprocess.CompletedP
|
||||||
if check and result.returncode != 0:
|
if check and result.returncode != 0:
|
||||||
raise PatchGenerationError(f"Command {' '.join(args)} failed: {result.stderr.strip()}")
|
raise PatchGenerationError(f"Command {' '.join(args)} failed: {result.stderr.strip()}")
|
||||||
return result
|
return result
|
||||||
|
def _run_ai_command(command: str, prompt: str, cwd: Path) -> tuple[str, str, str, int]:
|
||||||
|
"""Run an AI command and return (executed_command, stdout, stderr, returncode)."""
|
||||||
|
if command.strip().startswith("codex"):
|
||||||
|
return _run_codex_command(command, prompt, cwd)
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
command,
|
||||||
|
input=prompt,
|
||||||
|
text=True,
|
||||||
|
capture_output=True,
|
||||||
|
cwd=str(cwd),
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
raw_stdout = result.stdout or ""
|
||||||
|
stderr = result.stderr.strip()
|
||||||
|
return command, raw_stdout, stderr, result.returncode
|
||||||
|
|
||||||
|
|
||||||
|
def _run_codex_command(command: str, prompt: str, cwd: Path) -> tuple[str, str, str, int]:
|
||||||
|
"""Execute codex CLI with JSON output and extract last message text."""
|
||||||
|
json_command = _ensure_codex_json(command)
|
||||||
|
result = subprocess.run(
|
||||||
|
json_command,
|
||||||
|
input=prompt,
|
||||||
|
text=True,
|
||||||
|
capture_output=True,
|
||||||
|
cwd=str(cwd),
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout_text = result.stdout or ""
|
||||||
|
last_message = _extract_codex_last_message(stdout_text)
|
||||||
|
return json_command, last_message, result.stderr.strip(), result.returncode
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_codex_json(command: str) -> str:
|
||||||
|
"""Ensure codex command runs via `codex exec --json --color=never` for machine parsing."""
|
||||||
|
parts = command.strip().split()
|
||||||
|
if len(parts) >= 2 and parts[1] == "exec":
|
||||||
|
base = command
|
||||||
|
else:
|
||||||
|
base = command.replace("codex", "codex exec", 1)
|
||||||
|
|
||||||
|
if "--json" not in base:
|
||||||
|
base = f"{base} --json"
|
||||||
|
if "--color" not in base:
|
||||||
|
base = f"{base} --color=never"
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_codex_last_message(stdout_text: str) -> str:
|
||||||
|
"""Parse codex JSONL output and return the final agent message text."""
|
||||||
|
last_text = ""
|
||||||
|
for line in stdout_text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
payload = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
item = payload.get("item")
|
||||||
|
if isinstance(item, dict) and item.get("type") == "agent_message":
|
||||||
|
text = item.get("text")
|
||||||
|
if isinstance(text, str):
|
||||||
|
last_text = text
|
||||||
|
return last_text
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,10 @@ diff --git a/Docs/features/FR_1/discussions/example.discussion.sum.md b/Docs/fea
|
||||||
patch_file = tmp_path / "patch.txt"
|
patch_file = tmp_path / "patch.txt"
|
||||||
patch_file.write_text(patch_text, encoding="utf-8")
|
patch_file.write_text(patch_text, encoding="utf-8")
|
||||||
|
|
||||||
model = ModelConfig(commands=[f"bash -lc 'cat {patch_file.as_posix()}'"], sentinel=DEFAULT_SENTINEL)
|
model = ModelConfig(
|
||||||
|
commands=[f"bash -lc 'cat {patch_file.as_posix()}'"],
|
||||||
|
sentinel=DEFAULT_SENTINEL,
|
||||||
|
)
|
||||||
rules = RulesConfig(root=temp_repo, global_rules={"file_associations": {}, "rules": {}})
|
rules = RulesConfig(root=temp_repo, global_rules={"file_associations": {}, "rules": {}})
|
||||||
|
|
||||||
generate_output(
|
generate_output(
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,10 @@ diff --git a/Docs/features/FR_1/discussions/example.discussion.sum.md b/Docs/fea
|
||||||
patch_file.write_text(patch_text, encoding="utf-8")
|
patch_file.write_text(patch_text, encoding="utf-8")
|
||||||
|
|
||||||
rules = RulesConfig.load(repo)
|
rules = RulesConfig.load(repo)
|
||||||
model = ModelConfig(commands=[f"bash -lc 'cat {patch_file.as_posix()}'"], sentinel=DEFAULT_SENTINEL)
|
model = ModelConfig(
|
||||||
|
commands=[f"bash -lc 'cat {patch_file.as_posix()}'"],
|
||||||
|
sentinel=DEFAULT_SENTINEL,
|
||||||
|
)
|
||||||
|
|
||||||
rc = process(repo, rules, model)
|
rc = process(repo, rules, model)
|
||||||
assert rc == 0
|
assert rc == 0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue