From 60957ae4af1dc8410a2c295883ec7ea8b1b194dc Mon Sep 17 00:00:00 2001
From: rob <robdickson444@hotmail.com>
Date: Sat, 30 May 2026 15:42:33 -0300
Subject: [PATCH] Carry conversation history so "yes" / "do that" resolve

The driver interpreted each utterance in isolation (schemas + scene +
utterance only), so when WoodShop asked a clarifying question and the user
replied "yes", the next turn had no record of what was proposed and fell
back to "not sure what you'd like me to do".

- driver.interpret/handle now accept a rolling (utterance, reply) history;
  SYSTEM prompt gains a "Recent conversation" section instructing the model
  to execute the previously-proposed calls on affirmation.
- CLI main() keeps a history list across the loop.
- GUI Controller keeps a bounded self._history and threads it through
  run_command, appending each turn.
- tests: history render/window, prompt inclusion, handle + controller append.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/woodshop/driver.py         | 35 +++++++++++++++++++++++++++-----
 src/woodshop/gui/controller.py |  9 +++++++--
 tests/test_driver.py           | 37 ++++++++++++++++++++++++++++++++++
 tests/test_gui_controller.py   | 18 +++++++++++++++++
 4 files changed, 92 insertions(+), 7 deletions(-)
diff --git a/src/woodshop/driver.py b/src/woodshop/driver.py
index 53d0bce..16bd352 100644
--- a/src/woodshop/driver.py
+++ b/src/woodshop/driver.py
@@ -24,6 +24,7 @@ import sys
 
 TOOL_FILTER = "wood-*"  # auto-discover every wood-* tool, no hardcoded list
 REASON_PROVIDER = "claude -p"  # chosen for reliable structured tool-calling
+_MAX_HISTORY = 6  # turns of recent conversation fed back for reference-resolution
 
 # A board placed earlier in the SAME utterance is referenced as $1, $2, ...
 _SYMBOL = re.compile(r"\$(\d+)")
@@ -53,6 +54,13 @@ Tools (JSON schemas):
 Current scene:
 {scene}
 
+Recent conversation (oldest first) — use it to resolve back-references like "yes",
+"do that", "go ahead", or "the one you suggested". If your PREVIOUS turn proposed a
+specific set of changes and the user now affirms ("yes" / "do it" / "go ahead"),
+emit the full sequence of tool calls you proposed (read off the current scene for
+real ids). Only ask again if the affirmation is genuinely ambiguous.
+{history}
+
 Rules:
 - Respond with ONLY a JSON array. No prose, no markdown fences.
 - Each element is {{"tool": "<name>", "args": {{...}}}}.
@@ -118,9 +126,21 @@ def _extract_calls(raw: str) -> list[dict] | None:
     return None
 
 
-def interpret(utterance: str, schemas: str, scene_text: str | None = None) -> list[dict]:
+def _render_history(history: list[tuple[str, str]] | None) -> str:
+    if not history:
+        return "(no prior turns)"
+    lines = []
+    for user, assistant in history[-_MAX_HISTORY:]:
+        lines.append(f'User: "{user}"')
+        lines.append(f"WoodShop: {assistant}")
+    return "\n".join(lines)
+
+
+def interpret(utterance: str, schemas: str, scene_text: str | None = None,
+              history: list[tuple[str, str]] | None = None) -> list[dict]:
     scene = scene_text if scene_text is not None else scene_summary()
-    prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance)
+    prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance,
+                           history=_render_history(history))
     raw = _run(REASON_PROVIDER.split(), stdin=prompt)
     calls = _extract_calls(raw)
     if calls is None:
@@ -215,12 +235,15 @@ def summarize(calls: list[dict], messages: list[str]) -> str:
     return ("Done — " + ", ".join(chunks) + ".") if chunks else "Done."
 
 
-def handle(utterance: str, schemas: str, voice: bool, verbose: bool) -> None:
-    calls = interpret(utterance, schemas)
+def handle(utterance: str, schemas: str, voice: bool, verbose: bool,
+           history: list[tuple[str, str]] | None = None) -> None:
+    calls = interpret(utterance, schemas, history=history)
     messages = dispatch(calls, verbose=verbose)
     full = " ".join(m for m in messages if m).strip()
     spoken = summarize(calls, messages)
     print(f"WoodShop: {full or spoken}")
+    if history is not None:
+        history.append((utterance, spoken))
     if voice:
         speak(spoken)
 
@@ -256,6 +279,7 @@ def main(argv: list[str] | None = None) -> int:
         return 0
 
     print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.")
+    history: list[tuple[str, str]] = []
     while True:
         utterance = get_utterance(args.voice, args.duration)
         if utterance is None:
@@ -264,7 +288,8 @@ def main(argv: list[str] | None = None) -> int:
         if utterance.lower() in ("quit", "exit", "stop", "done"):
             return 0
         try:
-            handle(utterance, schemas, voice=args.voice, verbose=not args.quiet)
+            handle(utterance, schemas, voice=args.voice, verbose=not args.quiet,
+                   history=history)
         except Exception as exc:  # never let one bad command kill the session
             print(f"WoodShop: sorry, that command failed ({exc}).")
 
diff --git a/src/woodshop/gui/controller.py b/src/woodshop/gui/controller.py
index b10b1aa..b789a73 100644
--- a/src/woodshop/gui/controller.py
+++ b/src/woodshop/gui/controller.py
@@ -80,6 +80,7 @@ class Controller(QObject):
         self.scene_path = Path(scene_path) if scene_path else default_scene_path()
         self.scene = Scene.load(self.scene_path)
         self._schemas: str | None = None
+        self._history: list[tuple[str, str]] = []   # recent (utterance, reply) turns
         self.selected: list[str] = [self.scene.selection] if self.scene.selection else []
         self.active_feature: str | None = None   # feature currently being edited
         self.preview = None                      # (Part, Feature) shown as an overlay, or None
@@ -425,7 +426,11 @@ class Controller(QObject):
         scene_text = (cli.cmd_status(self.scene, None)
                       + f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}"
                       + "\n" + spatial_summary(self.scene))
-        calls = driver.interpret(text, self.schemas(), scene_text=scene_text)
+        calls = driver.interpret(text, self.schemas(), scene_text=scene_text,
+                                 history=self._history)
         messages = driver.dispatch(calls, verbose=False, executor=self.execute_call)
         self._commit()
-        return driver.summarize(calls, messages)
+        spoken = driver.summarize(calls, messages)
+        self._history.append((text, spoken))
+        del self._history[:-driver._MAX_HISTORY]   # keep a bounded window
+        return spoken
diff --git a/tests/test_driver.py b/tests/test_driver.py
index 00391cb..5adb766 100644
--- a/tests/test_driver.py
+++ b/tests/test_driver.py
@@ -98,3 +98,40 @@ def test_extract_calls_strips_fences_and_handles_object():
 
 def test_extract_calls_returns_none_on_garbage():
     assert driver._extract_calls("no json here") is None
+
+
+def test_render_history_empty_and_populated():
+    assert driver._render_history(None) == "(no prior turns)"
+    assert driver._render_history([]) == "(no prior turns)"
+    text = driver._render_history([("build a table", "Done — placed 9.")])
+    assert 'User: "build a table"' in text
+    assert "WoodShop: Done — placed 9." in text
+
+
+def test_render_history_windowed():
+    turns = [(f"u{i}", f"a{i}") for i in range(10)]
+    text = driver._render_history(turns)
+    assert "u9" in text and "u4" in text       # last _MAX_HISTORY kept
+    assert "u3" not in text                     # older dropped
+
+
+def test_interpret_includes_history_in_prompt(monkeypatch):
+    captured = {}
+
+    def fake_run(cmd, stdin=""):
+        captured["prompt"] = stdin
+        return "[]"
+
+    monkeypatch.setattr(driver, "_run", fake_run)
+    driver.interpret("yes", schemas="[]", scene_text="empty",
+                     history=[("add tenons?", "Want me to put a tenon on each end?")])
+    assert "Want me to put a tenon on each end?" in captured["prompt"]
+    assert 'User: "add tenons?"' in captured["prompt"]
+
+
+def test_handle_appends_to_history(monkeypatch):
+    monkeypatch.setattr(driver, "_run",
+                        lambda cmd, stdin="": '[{"tool": "say", "args": {"text": "hi there"}}]')
+    history = []
+    driver.handle("hello", schemas="[]", voice=False, verbose=False, history=history)
+    assert history == [("hello", "hi there")]
diff --git a/tests/test_gui_controller.py b/tests/test_gui_controller.py
index 3252c42..093241a 100644
--- a/tests/test_gui_controller.py
+++ b/tests/test_gui_controller.py
@@ -138,3 +138,21 @@ def test_break_feature_connection(tmp_path):
 def test_unknown_tool_is_safe(tmp_path):
     c = _controller(tmp_path)
     assert "unknown" in c.execute_call("wood-bogus", {}).lower()
+
+
+def test_run_command_threads_history(tmp_path, monkeypatch):
+    """run_command feeds prior turns to interpret and records the new turn."""
+    c = _controller(tmp_path)
+    seen = {}
+
+    def fake_interpret(text, schemas, scene_text=None, history=None):
+        seen["history"] = list(history or [])
+        return [{"tool": "say", "args": {"text": "want me to add tenons?"}}]
+
+    monkeypatch.setattr(driver, "interpret", fake_interpret)
+    c.run_command("build a table")
+    assert seen["history"] == []                       # first turn: nothing prior
+    assert c._history == [("build a table", "want me to add tenons?")]
+
+    c.run_command("yes")
+    assert seen["history"] == [("build a table", "want me to add tenons?")]