Add reference-photo input: "build something like this"
Attach a photo (📎 button, drag-drop, paste, or an image URL) and the driver hands it to claude -p, which reads the image (its Read tool sees images) and emits the usual tool-call JSON to build a simplified, buildable interpretation in dimensional lumber — no API keys, same claude -p pipe. - driver: interpret(image_path=) prepends a reference-photo directive with the image's absolute path; find_image_url() + fetch_image() download a linked image to a temp file; woodshop-talk --image (path or URL) for CLI/voice. - controller.run_command(image_path=) passthrough. - command bar: 📎 attach (file picker), drag-drop image, Ctrl+V paste image, and image-URL-in-text detection; downloads run off the UI thread; an image chip shows/clears the attachment. - tests: URL detection, image directive in prompt, fetch_image temp write, controller passthrough, command-bar attach + default-text smoke. 216 pass. Honest limit: the live image round-trip needs a real display/model call to verify — wired + unit-tested, please confirm it sees the photo on your machine. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b9b0871ac3
commit
c623ad2576
|
|
@ -8,6 +8,11 @@ Talk to it like the Star Trek holodeck and watch furniture build itself:
|
||||||
|
|
||||||
> *"Build a coffee table: a four foot by two foot frame from 2x4s, with four legs 18 inches tall standing at the corners."*
|
> *"Build a coffee table: a four foot by two foot frame from 2x4s, with four legs 18 inches tall standing at the corners."*
|
||||||
|
|
||||||
|
You can also **attach a reference photo** (📎, drag-drop, paste, or an image URL)
|
||||||
|
and say *"build something like this"* — WoodShop hands the image to the model and
|
||||||
|
builds a simplified, buildable interpretation in dimensional lumber that you then
|
||||||
|
refine by voice/text. (It's an interpretation, not a measured replica.)
|
||||||
|
|
||||||
Each board is real dimensional lumber (a 2x4 is modeled at its true 1.5″ × 3.5″),
|
Each board is real dimensional lumber (a 2x4 is modeled at its true 1.5″ × 3.5″),
|
||||||
so the result is buildable — export to **STEP** (CAD/CNC) or **STL** (3D print),
|
so the result is buildable — export to **STEP** (CAD/CNC) or **STL** (3D print),
|
||||||
and get a **cut list with board-feet and a shopping estimate**.
|
and get a **cut list with board-feet and a shopping estimate**.
|
||||||
|
|
|
||||||
|
|
@ -17,15 +17,53 @@ from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
TOOL_FILTER = "wood-*" # auto-discover every wood-* tool, no hardcoded list
|
TOOL_FILTER = "wood-*" # auto-discover every wood-* tool, no hardcoded list
|
||||||
REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling
|
REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling
|
||||||
_MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution
|
_MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution
|
||||||
|
|
||||||
|
# A reference photo can be attached to "build something like this". claude -p
|
||||||
|
# reads the image file (via its Read tool), so we just hand it an absolute path.
|
||||||
|
_IMG_URL = re.compile(r'https?://\S+?\.(?:png|jpe?g|webp|gif|bmp)\b', re.I)
|
||||||
|
_IMAGE_DIRECTIVE = (
|
||||||
|
"A REFERENCE PHOTO of furniture is saved at this path:\n {path}\n"
|
||||||
|
"Open and look at that image file. The user wants to build something LIKE it "
|
||||||
|
"from dimensional lumber and plywood. Infer the major parts, rough proportions, "
|
||||||
|
"and joinery, and emit the tool calls to build a SIMPLIFIED, buildable version "
|
||||||
|
"with reasonable real dimensions in inches. This is an interpretation, not an "
|
||||||
|
"exact replica — prefer standard stock sizes and right angles.\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def find_image_url(text: str) -> str | None:
|
||||||
|
m = _IMG_URL.search(text or "")
|
||||||
|
return m.group(0) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_image(url: str, timeout: int = 20) -> str:
|
||||||
|
"""Download an image URL to a temp file and return its path. Raises on
|
||||||
|
failure (caller decides how to surface it)."""
|
||||||
|
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
ctype = (resp.headers.get("Content-Type") or "").split(";")[0].strip().lower()
|
||||||
|
data = resp.read()
|
||||||
|
ext = {"image/png": ".png", "image/jpeg": ".jpg", "image/webp": ".webp",
|
||||||
|
"image/gif": ".gif", "image/bmp": ".bmp"}.get(ctype)
|
||||||
|
if ext is None:
|
||||||
|
m = re.search(r'\.(png|jpe?g|webp|gif|bmp)\b', url, re.I)
|
||||||
|
ext = "." + m.group(1).lower() if m else ".img"
|
||||||
|
fd, path = tempfile.mkstemp(suffix=ext, prefix="woodshop-ref-")
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(data)
|
||||||
|
return path
|
||||||
|
|
||||||
# A board placed earlier in the SAME utterance is referenced as $1, $2, ...
|
# A board placed earlier in the SAME utterance is referenced as $1, $2, ...
|
||||||
_SYMBOL = re.compile(r"\$(\d+)")
|
_SYMBOL = re.compile(r"\$(\d+)")
|
||||||
|
|
||||||
|
|
@ -143,10 +181,13 @@ def _render_history(history: list[tuple[str, str]] | None) -> str:
|
||||||
|
|
||||||
|
|
||||||
def interpret(utterance: str, schemas: str, scene_text: str | None = None,
|
def interpret(utterance: str, schemas: str, scene_text: str | None = None,
|
||||||
history: list[tuple[str, str]] | None = None) -> list[dict]:
|
history: list[tuple[str, str]] | None = None,
|
||||||
|
image_path: str | None = None) -> list[dict]:
|
||||||
scene = scene_text if scene_text is not None else scene_summary()
|
scene = scene_text if scene_text is not None else scene_summary()
|
||||||
prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance,
|
prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance,
|
||||||
history=_render_history(history))
|
history=_render_history(history))
|
||||||
|
if image_path:
|
||||||
|
prompt = _IMAGE_DIRECTIVE.format(path=os.path.abspath(image_path)) + prompt
|
||||||
raw = _run(REASON_PROVIDER.split(), stdin=prompt)
|
raw = _run(REASON_PROVIDER.split(), stdin=prompt)
|
||||||
calls = _extract_calls(raw)
|
calls = _extract_calls(raw)
|
||||||
if calls is None:
|
if calls is None:
|
||||||
|
|
@ -242,8 +283,9 @@ def summarize(calls: list[dict], messages: list[str]) -> str:
|
||||||
|
|
||||||
|
|
||||||
def handle(utterance: str, schemas: str, voice: bool, verbose: bool,
|
def handle(utterance: str, schemas: str, voice: bool, verbose: bool,
|
||||||
history: list[tuple[str, str]] | None = None) -> None:
|
history: list[tuple[str, str]] | None = None,
|
||||||
calls = interpret(utterance, schemas, history=history)
|
image_path: str | None = None) -> None:
|
||||||
|
calls = interpret(utterance, schemas, history=history, image_path=image_path)
|
||||||
messages = dispatch(calls, verbose=verbose)
|
messages = dispatch(calls, verbose=verbose)
|
||||||
full = " ".join(m for m in messages if m).strip()
|
full = " ".join(m for m in messages if m).strip()
|
||||||
spoken = summarize(calls, messages)
|
spoken = summarize(calls, messages)
|
||||||
|
|
@ -271,9 +313,14 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
ap.add_argument("--voice", action="store_true", help="Listen on the mic instead of typing")
|
ap.add_argument("--voice", action="store_true", help="Listen on the mic instead of typing")
|
||||||
ap.add_argument("--duration", type=int, default=6, help="Mic recording seconds (--voice)")
|
ap.add_argument("--duration", type=int, default=6, help="Mic recording seconds (--voice)")
|
||||||
ap.add_argument("--once", help="Run a single command (non-interactive) and exit")
|
ap.add_argument("--once", help="Run a single command (non-interactive) and exit")
|
||||||
|
ap.add_argument("--image", help="Reference photo (path or URL) for 'build something like this'")
|
||||||
ap.add_argument("--quiet", action="store_true", help="Don't print per-call detail")
|
ap.add_argument("--quiet", action="store_true", help="Don't print per-call detail")
|
||||||
args = ap.parse_args(argv)
|
args = ap.parse_args(argv)
|
||||||
|
|
||||||
|
image_path = None
|
||||||
|
if args.image:
|
||||||
|
image_path = fetch_image(args.image) if args.image.startswith("http") else args.image
|
||||||
|
|
||||||
schemas = load_schemas()
|
schemas = load_schemas()
|
||||||
if not schemas:
|
if not schemas:
|
||||||
print("Could not load wood-* tool schemas (is CmdForge/pa-load-tools available?)",
|
print("Could not load wood-* tool schemas (is CmdForge/pa-load-tools available?)",
|
||||||
|
|
@ -281,7 +328,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.once is not None:
|
if args.once is not None:
|
||||||
handle(args.once, schemas, voice=args.voice, verbose=not args.quiet)
|
handle(args.once, schemas, voice=args.voice, verbose=not args.quiet,
|
||||||
|
image_path=image_path)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.")
|
print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.")
|
||||||
|
|
@ -295,7 +343,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
return 0
|
return 0
|
||||||
try:
|
try:
|
||||||
handle(utterance, schemas, voice=args.voice, verbose=not args.quiet,
|
handle(utterance, schemas, voice=args.voice, verbose=not args.quiet,
|
||||||
history=history)
|
history=history, image_path=image_path)
|
||||||
|
image_path = None # the reference photo applies to the first turn only
|
||||||
except Exception as exc: # never let one bad command kill the session
|
except Exception as exc: # never let one bad command kill the session
|
||||||
print(f"WoodShop: sorry, that command failed ({exc}).")
|
print(f"WoodShop: sorry, that command failed ({exc}).")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,23 @@
|
||||||
"""Command bar: type a command or push-to-talk, see the transcript, optionally
|
"""Command bar: type a command or push-to-talk, see the transcript, optionally
|
||||||
hear the reply. Slow work (LLM, dictate, TTS) runs off the UI thread."""
|
hear the reply. You can also attach a reference photo ("build something like
|
||||||
|
this") by the 📎 button, drag-drop, paste, or a pasted image URL. Slow work
|
||||||
|
(LLM, dictate, TTS, image download) runs off the UI thread."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from PySide6.QtCore import Qt, QThreadPool
|
from PySide6.QtCore import Qt, QThreadPool
|
||||||
from PySide6.QtWidgets import (QCheckBox, QHBoxLayout, QLabel, QLineEdit,
|
from PySide6.QtGui import QKeySequence
|
||||||
QPushButton, QTextEdit, QVBoxLayout, QWidget)
|
from PySide6.QtWidgets import (QApplication, QCheckBox, QFileDialog, QHBoxLayout, QLabel,
|
||||||
|
QLineEdit, QPushButton, QTextEdit, QVBoxLayout, QWidget)
|
||||||
|
|
||||||
|
from .. import driver
|
||||||
from .controller import Controller
|
from .controller import Controller
|
||||||
from .workers import run_async
|
from .workers import run_async
|
||||||
|
|
||||||
_WHO_COLOR = {"you": "#9cdcfe", "ws": "#c8965a", "sys": "#e06c75"}
|
_WHO_COLOR = {"you": "#9cdcfe", "ws": "#c8965a", "sys": "#e06c75"}
|
||||||
|
_IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp")
|
||||||
|
|
||||||
|
|
||||||
class CommandBar(QWidget):
|
class CommandBar(QWidget):
|
||||||
|
|
@ -19,6 +25,8 @@ class CommandBar(QWidget):
|
||||||
super().__init__(parent)
|
super().__init__(parent)
|
||||||
self.c = controller
|
self.c = controller
|
||||||
self.pool = pool
|
self.pool = pool
|
||||||
|
self._pending_image: str | None = None # attached reference photo path
|
||||||
|
self.setAcceptDrops(True) # drop an image onto the bar
|
||||||
|
|
||||||
root = QVBoxLayout(self)
|
root = QVBoxLayout(self)
|
||||||
self.transcript = QTextEdit(readOnly=True)
|
self.transcript = QTextEdit(readOnly=True)
|
||||||
|
|
@ -32,6 +40,12 @@ class CommandBar(QWidget):
|
||||||
self.mic.clicked.connect(self._listen)
|
self.mic.clicked.connect(self._listen)
|
||||||
row.addWidget(self.mic)
|
row.addWidget(self.mic)
|
||||||
|
|
||||||
|
self.attach = QPushButton("📎")
|
||||||
|
self.attach.setToolTip("Attach a reference photo — then say 'build something like this'")
|
||||||
|
self.attach.setFixedWidth(40)
|
||||||
|
self.attach.clicked.connect(self._attach_image)
|
||||||
|
row.addWidget(self.attach)
|
||||||
|
|
||||||
self.input = QLineEdit()
|
self.input = QLineEdit()
|
||||||
self.input.setPlaceholderText("Type a command, e.g. 'build a coffee table' — Enter to send")
|
self.input.setPlaceholderText("Type a command, e.g. 'build a coffee table' — Enter to send")
|
||||||
self.input.returnPressed.connect(self._send)
|
self.input.returnPressed.connect(self._send)
|
||||||
|
|
@ -45,6 +59,9 @@ class CommandBar(QWidget):
|
||||||
bottom = QHBoxLayout()
|
bottom = QHBoxLayout()
|
||||||
self.speak = QCheckBox("Speak replies")
|
self.speak = QCheckBox("Speak replies")
|
||||||
bottom.addWidget(self.speak)
|
bottom.addWidget(self.speak)
|
||||||
|
self.image_chip = QLabel("") # shows the attached photo name
|
||||||
|
self.image_chip.setStyleSheet("color:#c8965a")
|
||||||
|
bottom.addWidget(self.image_chip)
|
||||||
bottom.addStretch()
|
bottom.addStretch()
|
||||||
self.status = QLabel("")
|
self.status = QLabel("")
|
||||||
bottom.addWidget(self.status)
|
bottom.addWidget(self.status)
|
||||||
|
|
@ -52,6 +69,69 @@ class CommandBar(QWidget):
|
||||||
|
|
||||||
self.c.logged.connect(self._log)
|
self.c.logged.connect(self._log)
|
||||||
|
|
||||||
|
# ----- reference image ---------------------------------------------
|
||||||
|
def _set_image(self, path: str | None) -> None:
|
||||||
|
self._pending_image = path
|
||||||
|
if path:
|
||||||
|
name = os.path.basename(path)
|
||||||
|
self.image_chip.setText(f"📎 {name} ✕")
|
||||||
|
self.image_chip.setToolTip("Click to remove the attached photo")
|
||||||
|
else:
|
||||||
|
self.image_chip.setText("")
|
||||||
|
self.image_chip.setToolTip("")
|
||||||
|
|
||||||
|
def mousePressEvent(self, e):
|
||||||
|
# click the chip text to clear the attachment
|
||||||
|
if self._pending_image and self.image_chip.geometry().contains(e.pos()):
|
||||||
|
self._set_image(None)
|
||||||
|
super().mousePressEvent(e)
|
||||||
|
|
||||||
|
def _attach_image(self) -> None:
|
||||||
|
path, _ = QFileDialog.getOpenFileName(
|
||||||
|
self, "Attach reference photo", "",
|
||||||
|
"Images (*.png *.jpg *.jpeg *.webp *.gif *.bmp)")
|
||||||
|
if path:
|
||||||
|
self._set_image(path)
|
||||||
|
if not self.input.text().strip():
|
||||||
|
self.input.setText("build something like this")
|
||||||
|
|
||||||
|
def dragEnterEvent(self, e):
|
||||||
|
md = e.mimeData()
|
||||||
|
if md.hasImage() or any(u.toLocalFile().lower().endswith(_IMAGE_EXTS)
|
||||||
|
for u in md.urls()):
|
||||||
|
e.acceptProposedAction()
|
||||||
|
|
||||||
|
def dropEvent(self, e):
|
||||||
|
md = e.mimeData()
|
||||||
|
for u in md.urls():
|
||||||
|
p = u.toLocalFile()
|
||||||
|
if p.lower().endswith(_IMAGE_EXTS):
|
||||||
|
self._set_image(p)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if md.hasImage():
|
||||||
|
self._save_clipboard_image(md.imageData())
|
||||||
|
if self._pending_image and not self.input.text().strip():
|
||||||
|
self.input.setText("build something like this")
|
||||||
|
|
||||||
|
def _save_clipboard_image(self, qimage) -> None:
|
||||||
|
import tempfile
|
||||||
|
if qimage is None or qimage.isNull():
|
||||||
|
return
|
||||||
|
fd, path = tempfile.mkstemp(suffix=".png", prefix="woodshop-paste-")
|
||||||
|
os.close(fd)
|
||||||
|
if qimage.save(path, "PNG"):
|
||||||
|
self._set_image(path)
|
||||||
|
|
||||||
|
def keyPressEvent(self, e):
|
||||||
|
# paste an image straight from the clipboard (Ctrl+V) when the bar has focus
|
||||||
|
if e.matches(QKeySequence.Paste):
|
||||||
|
img = QApplication.clipboard().image()
|
||||||
|
if not img.isNull():
|
||||||
|
self._save_clipboard_image(img)
|
||||||
|
return
|
||||||
|
super().keyPressEvent(e)
|
||||||
|
|
||||||
# ----- logging -----------------------------------------------------
|
# ----- logging -----------------------------------------------------
|
||||||
def _log(self, who: str, text: str) -> None:
|
def _log(self, who: str, text: str) -> None:
|
||||||
if not text:
|
if not text:
|
||||||
|
|
@ -65,26 +145,34 @@ class CommandBar(QWidget):
|
||||||
def _busy(self, on: bool, msg: str = "") -> None:
|
def _busy(self, on: bool, msg: str = "") -> None:
|
||||||
self.input.setEnabled(not on)
|
self.input.setEnabled(not on)
|
||||||
self.mic.setEnabled(not on)
|
self.mic.setEnabled(not on)
|
||||||
|
self.attach.setEnabled(not on)
|
||||||
self.status.setText(msg)
|
self.status.setText(msg)
|
||||||
|
|
||||||
# ----- send typed/spoken command -----------------------------------
|
# ----- send typed/spoken command -----------------------------------
|
||||||
def _send(self) -> None:
|
def _send(self) -> None:
|
||||||
text = self.input.text().strip()
|
text = self.input.text().strip()
|
||||||
if not text:
|
if not text and not self._pending_image:
|
||||||
return
|
return
|
||||||
self.input.clear()
|
self.input.clear()
|
||||||
self._run(text)
|
self._run(text or "build something like this")
|
||||||
|
|
||||||
def submit(self, text: str) -> None:
|
def submit(self, text: str) -> None:
|
||||||
"""Run a command programmatically (e.g. from a Build-menu template)."""
|
"""Run a command programmatically (e.g. from a Build-menu template)."""
|
||||||
self._run(text)
|
self._run(text)
|
||||||
|
|
||||||
def _run(self, text: str) -> None:
|
def _run(self, text: str) -> None:
|
||||||
self._log("you", text)
|
image = self._pending_image
|
||||||
self._busy(True, "thinking…")
|
url = None if image else driver.find_image_url(text)
|
||||||
|
note = " 📎 photo" if (image or url) else ""
|
||||||
|
self._log("you", text + note)
|
||||||
|
self._set_image(None)
|
||||||
|
self._busy(True, "looking…" if (image or url) else "thinking…")
|
||||||
|
|
||||||
def work():
|
def work():
|
||||||
return self.c.run_command(text)
|
path = image
|
||||||
|
if path is None and url:
|
||||||
|
path = driver.fetch_image(url) # download the linked image
|
||||||
|
return self.c.run_command(text, image_path=path)
|
||||||
|
|
||||||
def done(summary):
|
def done(summary):
|
||||||
self._busy(False)
|
self._busy(False)
|
||||||
|
|
|
||||||
|
|
@ -427,9 +427,9 @@ class Controller(QObject):
|
||||||
except (SceneError, ValueError, KeyError) as exc:
|
except (SceneError, ValueError, KeyError) as exc:
|
||||||
return str(exc).strip('"')
|
return str(exc).strip('"')
|
||||||
|
|
||||||
def run_command(self, text: str) -> str:
|
def run_command(self, text: str, image_path: str | None = None) -> str:
|
||||||
"""Interpret a spoken/typed command and apply it. Returns a spoken summary.
|
"""Interpret a spoken/typed command (optionally with a reference photo) and
|
||||||
(Slow — call from a worker thread.)"""
|
apply it. Returns a spoken summary. (Slow — call from a worker thread.)"""
|
||||||
from ..scene import spatial_summary
|
from ..scene import spatial_summary
|
||||||
self.save() # ensure disk reflects current state
|
self.save() # ensure disk reflects current state
|
||||||
sel = ", ".join(self.selected) if self.selected else "none"
|
sel = ", ".join(self.selected) if self.selected else "none"
|
||||||
|
|
@ -437,7 +437,7 @@ class Controller(QObject):
|
||||||
+ f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}"
|
+ f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}"
|
||||||
+ "\n" + spatial_summary(self.scene))
|
+ "\n" + spatial_summary(self.scene))
|
||||||
calls = driver.interpret(text, self.schemas(), scene_text=scene_text,
|
calls = driver.interpret(text, self.schemas(), scene_text=scene_text,
|
||||||
history=self._history)
|
history=self._history, image_path=image_path)
|
||||||
messages = driver.dispatch(calls, verbose=False, executor=self.execute_call)
|
messages = driver.dispatch(calls, verbose=False, executor=self.execute_call)
|
||||||
self._commit()
|
self._commit()
|
||||||
spoken = driver.summarize(calls, messages)
|
spoken = driver.summarize(calls, messages)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
"""Offscreen smoke tests for the command bar's image attachment."""
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
|
||||||
|
pytest.importorskip("PySide6")
|
||||||
|
|
||||||
|
from PySide6.QtCore import QThreadPool # noqa: E402
|
||||||
|
from PySide6.QtWidgets import QApplication # noqa: E402
|
||||||
|
|
||||||
|
from woodshop.gui.command_bar import CommandBar # noqa: E402
|
||||||
|
from woodshop.gui.controller import Controller # noqa: E402
|
||||||
|
|
||||||
|
_app = QApplication.instance() or QApplication([])
|
||||||
|
|
||||||
|
|
||||||
|
def test_attach_sets_pending_and_chip(tmp_path):
|
||||||
|
c = Controller(str(tmp_path / "s.json"))
|
||||||
|
bar = CommandBar(c, QThreadPool.globalInstance())
|
||||||
|
img = tmp_path / "chair.png"
|
||||||
|
img.write_bytes(b"\x89PNG")
|
||||||
|
bar._set_image(str(img))
|
||||||
|
assert bar._pending_image == str(img)
|
||||||
|
assert "chair.png" in bar.image_chip.text()
|
||||||
|
bar._set_image(None)
|
||||||
|
assert bar._pending_image is None and bar.image_chip.text() == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_send_with_only_image_uses_default_text(tmp_path, monkeypatch):
|
||||||
|
c = Controller(str(tmp_path / "s.json"))
|
||||||
|
bar = CommandBar(c, QThreadPool.globalInstance())
|
||||||
|
calls = {}
|
||||||
|
monkeypatch.setattr(bar, "_run", lambda text: calls.setdefault("text", text))
|
||||||
|
bar._set_image(str(tmp_path / "x.png"))
|
||||||
|
bar.input.clear()
|
||||||
|
bar._send()
|
||||||
|
assert calls["text"] == "build something like this"
|
||||||
|
|
@ -146,3 +146,44 @@ def test_woodshop_cmd_falls_back_to_module(monkeypatch):
|
||||||
monkeypatch.setattr(driver.shutil, "which", lambda name: None)
|
monkeypatch.setattr(driver.shutil, "which", lambda name: None)
|
||||||
cmd = driver.woodshop_cmd()
|
cmd = driver.woodshop_cmd()
|
||||||
assert cmd[1:] == ["-m", "woodshop"] and cmd[0] # python -m woodshop
|
assert cmd[1:] == ["-m", "woodshop"] and cmd[0] # python -m woodshop
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_image_url():
|
||||||
|
assert driver.find_image_url("build like this https://x.com/chair.jpg please") \
|
||||||
|
== "https://x.com/chair.jpg"
|
||||||
|
assert driver.find_image_url("https://x.com/a.PNG") == "https://x.com/a.PNG"
|
||||||
|
assert driver.find_image_url("no image here http://x.com/page") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_includes_image_directive(monkeypatch, tmp_path):
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_run(cmd, stdin=""):
|
||||||
|
captured["prompt"] = stdin
|
||||||
|
return "[]"
|
||||||
|
|
||||||
|
img = tmp_path / "ref.jpg"
|
||||||
|
img.write_bytes(b"\xff\xd8\xff") # not a real jpeg, just a path
|
||||||
|
monkeypatch.setattr(driver, "_run", fake_run)
|
||||||
|
driver.interpret("build something like this", schemas="[]", scene_text="empty",
|
||||||
|
image_path=str(img))
|
||||||
|
assert "REFERENCE PHOTO" in captured["prompt"]
|
||||||
|
assert str(img) in captured["prompt"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_image_writes_temp(monkeypatch):
|
||||||
|
import io
|
||||||
|
|
||||||
|
class FakeResp:
|
||||||
|
headers = {"Content-Type": "image/png"}
|
||||||
|
def __enter__(self): return self
|
||||||
|
def __exit__(self, *a): return False
|
||||||
|
def read(self): return b"\x89PNG\r\n\x1a\n"
|
||||||
|
|
||||||
|
monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp())
|
||||||
|
path = driver.fetch_image("https://x.com/chair.png")
|
||||||
|
assert path.endswith(".png")
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
assert f.read().startswith(b"\x89PNG")
|
||||||
|
import os as _os
|
||||||
|
_os.remove(path)
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,7 @@ def test_run_command_threads_history(tmp_path, monkeypatch):
|
||||||
c = _controller(tmp_path)
|
c = _controller(tmp_path)
|
||||||
seen = {}
|
seen = {}
|
||||||
|
|
||||||
def fake_interpret(text, schemas, scene_text=None, history=None):
|
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None):
|
||||||
seen["history"] = list(history or [])
|
seen["history"] = list(history or [])
|
||||||
return [{"tool": "say", "args": {"text": "want me to add tenons?"}}]
|
return [{"tool": "say", "args": {"text": "want me to add tenons?"}}]
|
||||||
|
|
||||||
|
|
@ -156,3 +156,16 @@ def test_run_command_threads_history(tmp_path, monkeypatch):
|
||||||
|
|
||||||
c.run_command("yes")
|
c.run_command("yes")
|
||||||
assert seen["history"] == [("build a table", "want me to add tenons?")]
|
assert seen["history"] == [("build a table", "want me to add tenons?")]
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_command_forwards_image_path(tmp_path, monkeypatch):
|
||||||
|
c = _controller(tmp_path)
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None):
|
||||||
|
seen["image_path"] = image_path
|
||||||
|
return [{"tool": "say", "args": {"text": "ok"}}]
|
||||||
|
|
||||||
|
monkeypatch.setattr(driver, "interpret", fake_interpret)
|
||||||
|
c.run_command("build like this", image_path="/tmp/ref.jpg")
|
||||||
|
assert seen["image_path"] == "/tmp/ref.jpg"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue