From 60957ae4af1dc8410a2c295883ec7ea8b1b194dc Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 30 May 2026 15:42:33 -0300 Subject: [PATCH] Carry conversation history so "yes" / "do that" resolve The driver interpreted each utterance in isolation (schemas + scene + utterance only), so when WoodShop asked a clarifying question and the user replied "yes", the next turn had no record of what was proposed and fell back to "not sure what you'd like me to do". - driver.interpret/handle now accept a rolling (utterance, reply) history; SYSTEM prompt gains a "Recent conversation" section instructing the model to execute the previously-proposed calls on affirmation. - CLI main() keeps a history list across the loop. - GUI Controller keeps a bounded self._history and threads it through run_command, appending each turn. - tests: history render/window, prompt inclusion, handle + controller append. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/woodshop/driver.py | 35 +++++++++++++++++++++++++++----- src/woodshop/gui/controller.py | 9 +++++++-- tests/test_driver.py | 37 ++++++++++++++++++++++++++++++++++ tests/test_gui_controller.py | 18 +++++++++++++++++ 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/src/woodshop/driver.py b/src/woodshop/driver.py index 53d0bce..16bd352 100644 --- a/src/woodshop/driver.py +++ b/src/woodshop/driver.py @@ -24,6 +24,7 @@ import sys TOOL_FILTER = "wood-*" # auto-discover every wood-* tool, no hardcoded list REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling +_MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution # A board placed earlier in the SAME utterance is referenced as $1, $2, ... _SYMBOL = re.compile(r"\$(\d+)") @@ -53,6 +54,13 @@ Tools (JSON schemas): Current scene: {scene} +Recent conversation (oldest first) — use it to resolve back-references like "yes", +"do that", "go ahead", or "the one you suggested". If your PREVIOUS turn proposed a +specific set of changes and the user now affirms ("yes" / "do it" / "go ahead"), +emit the full sequence of tool calls you proposed (read off the current scene for +real ids). Only ask again if the affirmation is genuinely ambiguous. +{history} + Rules: - Respond with ONLY a JSON array. No prose, no markdown fences. - Each element is {{"tool": "", "args": {{...}}}}. @@ -118,9 +126,21 @@ def _extract_calls(raw: str) -> list[dict] | None: return None -def interpret(utterance: str, schemas: str, scene_text: str | None = None) -> list[dict]: +def _render_history(history: list[tuple[str, str]] | None) -> str: + if not history: + return "(no prior turns)" + lines = [] + for user, assistant in history[-_MAX_HISTORY:]: + lines.append(f'User: "{user}"') + lines.append(f"WoodShop: {assistant}") + return "\n".join(lines) + + +def interpret(utterance: str, schemas: str, scene_text: str | None = None, + history: list[tuple[str, str]] | None = None) -> list[dict]: scene = scene_text if scene_text is not None else scene_summary() - prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance) + prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance, + history=_render_history(history)) raw = _run(REASON_PROVIDER.split(), stdin=prompt) calls = _extract_calls(raw) if calls is None: @@ -215,12 +235,15 @@ def summarize(calls: list[dict], messages: list[str]) -> str: return ("Done — " + ", ".join(chunks) + ".") if chunks else "Done." -def handle(utterance: str, schemas: str, voice: bool, verbose: bool) -> None: - calls = interpret(utterance, schemas) +def handle(utterance: str, schemas: str, voice: bool, verbose: bool, + history: list[tuple[str, str]] | None = None) -> None: + calls = interpret(utterance, schemas, history=history) messages = dispatch(calls, verbose=verbose) full = " ".join(m for m in messages if m).strip() spoken = summarize(calls, messages) print(f"WoodShop: {full or spoken}") + if history is not None: + history.append((utterance, spoken)) if voice: speak(spoken) @@ -256,6 +279,7 @@ def main(argv: list[str] | None = None) -> int: return 0 print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.") + history: list[tuple[str, str]] = [] while True: utterance = get_utterance(args.voice, args.duration) if utterance is None: @@ -264,7 +288,8 @@ def main(argv: list[str] | None = None) -> int: if utterance.lower() in ("quit", "exit", "stop", "done"): return 0 try: - handle(utterance, schemas, voice=args.voice, verbose=not args.quiet) + handle(utterance, schemas, voice=args.voice, verbose=not args.quiet, + history=history) except Exception as exc: # never let one bad command kill the session print(f"WoodShop: sorry, that command failed ({exc}).") diff --git a/src/woodshop/gui/controller.py b/src/woodshop/gui/controller.py index b10b1aa..b789a73 100644 --- a/src/woodshop/gui/controller.py +++ b/src/woodshop/gui/controller.py @@ -80,6 +80,7 @@ class Controller(QObject): self.scene_path = Path(scene_path) if scene_path else default_scene_path() self.scene = Scene.load(self.scene_path) self._schemas: str | None = None + self._history: list[tuple[str, str]] = [] # recent (utterance, reply) turns self.selected: list[str] = [self.scene.selection] if self.scene.selection else [] self.active_feature: str | None = None # feature currently being edited self.preview = None # (Part, Feature) shown as an overlay, or None @@ -425,7 +426,11 @@ class Controller(QObject): scene_text = (cli.cmd_status(self.scene, None) + f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}" + "\n" + spatial_summary(self.scene)) - calls = driver.interpret(text, self.schemas(), scene_text=scene_text) + calls = driver.interpret(text, self.schemas(), scene_text=scene_text, + history=self._history) messages = driver.dispatch(calls, verbose=False, executor=self.execute_call) self._commit() - return driver.summarize(calls, messages) + spoken = driver.summarize(calls, messages) + self._history.append((text, spoken)) + del self._history[:-driver._MAX_HISTORY] # keep a bounded window + return spoken diff --git a/tests/test_driver.py b/tests/test_driver.py index 00391cb..5adb766 100644 --- a/tests/test_driver.py +++ b/tests/test_driver.py @@ -98,3 +98,40 @@ def test_extract_calls_strips_fences_and_handles_object(): def test_extract_calls_returns_none_on_garbage(): assert driver._extract_calls("no json here") is None + + +def test_render_history_empty_and_populated(): + assert driver._render_history(None) == "(no prior turns)" + assert driver._render_history([]) == "(no prior turns)" + text = driver._render_history([("build a table", "Done — placed 9.")]) + assert 'User: "build a table"' in text + assert "WoodShop: Done — placed 9." in text + + +def test_render_history_windowed(): + turns = [(f"u{i}", f"a{i}") for i in range(10)] + text = driver._render_history(turns) + assert "u9" in text and "u4" in text # last _MAX_HISTORY kept + assert "u3" not in text # older dropped + + +def test_interpret_includes_history_in_prompt(monkeypatch): + captured = {} + + def fake_run(cmd, stdin=""): + captured["prompt"] = stdin + return "[]" + + monkeypatch.setattr(driver, "_run", fake_run) + driver.interpret("yes", schemas="[]", scene_text="empty", + history=[("add tenons?", "Want me to put a tenon on each end?")]) + assert "Want me to put a tenon on each end?" in captured["prompt"] + assert 'User: "add tenons?"' in captured["prompt"] + + +def test_handle_appends_to_history(monkeypatch): + monkeypatch.setattr(driver, "_run", + lambda cmd, stdin="": '[{"tool": "say", "args": {"text": "hi there"}}]') + history = [] + driver.handle("hello", schemas="[]", voice=False, verbose=False, history=history) + assert history == [("hello", "hi there")] diff --git a/tests/test_gui_controller.py b/tests/test_gui_controller.py index 3252c42..093241a 100644 --- a/tests/test_gui_controller.py +++ b/tests/test_gui_controller.py @@ -138,3 +138,21 @@ def test_break_feature_connection(tmp_path): def test_unknown_tool_is_safe(tmp_path): c = _controller(tmp_path) assert "unknown" in c.execute_call("wood-bogus", {}).lower() + + +def test_run_command_threads_history(tmp_path, monkeypatch): + """run_command feeds prior turns to interpret and records the new turn.""" + c = _controller(tmp_path) + seen = {} + + def fake_interpret(text, schemas, scene_text=None, history=None): + seen["history"] = list(history or []) + return [{"tool": "say", "args": {"text": "want me to add tenons?"}}] + + monkeypatch.setattr(driver, "interpret", fake_interpret) + c.run_command("build a table") + assert seen["history"] == [] # first turn: nothing prior + assert c._history == [("build a table", "want me to add tenons?")] + + c.run_command("yes") + assert seen["history"] == [("build a table", "want me to add tenons?")]