Reference input now accepts PDF plans, 3D models, and web links

Extends "build something like this" beyond photos:

- driver.resolve_reference(source) routes any path/URL: image/PDF → a path
  claude -p reads directly; STL/STEP/OBJ → render_mesh() renders an isometric
  PNG (pyvista; STEP via build123d→STL) and reports the bounding box; a normal
  web URL → fetch_web_text() pulls the page's visible text.
- interpret(reference_text=) injects guide/render-dims text alongside any image
  directive; handle() + controller.run_command() + woodshop-talk --ref pass it.
- command bar: picker/drag-drop accept images + .pdf + 3D files; any pasted URL
  is resolved; resolution (download/render/fetch) runs off the UI thread.
- find_image_url→find_reference_url (any URL); fetch_image→fetch_url (generic).
- tests: URL detect, image+reference-text directives, fetch_url, web-text strip,
  resolve_reference routing per kind, real STL render (skips without GL). 220 pass.

3D render gives the model EXACT proportions (+ bbox) instead of a 2D guess.
Honest limit: render needs the viewer stack + working off-screen GL on your box;
the live model round-trip still wants your eyes to confirm.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
rob 2026-05-30 22:37:38 -03:00
parent c623ad2576
commit 84ae6d8756
6 changed files with 199 additions and 72 deletions

View File

@ -8,10 +8,12 @@ Talk to it like the Star Trek holodeck and watch furniture build itself:
> *"Build a coffee table: a four foot by two foot frame from 2x4s, with four legs 18 inches tall standing at the corners."* > *"Build a coffee table: a four foot by two foot frame from 2x4s, with four legs 18 inches tall standing at the corners."*
You can also **attach a reference photo** (📎, drag-drop, paste, or an image URL) You can also **attach a reference** (📎, drag-drop, paste, or a URL) and say
and say *"build something like this"* — WoodShop hands the image to the model and *"build something like this"*: a **photo**, a **PDF plan**, a **3D model**
builds a simplified, buildable interpretation in dimensional lumber that you then (STL/STEP/OBJ — rendered to an image, with its bounding box measured), or a
refine by voice/text. (It's an interpretation, not a measured replica.) **web-page guide** (its text is pulled). WoodShop builds a simplified, buildable
interpretation in dimensional lumber that you then refine by voice/text. (It's an
interpretation, not a measured replica.)
Each board is real dimensional lumber (a 2x4 is modeled at its true 1.5″ × 3.5″), Each board is real dimensional lumber (a 2x4 is modeled at its true 1.5″ × 3.5″),
so the result is buildable — export to **STEP** (CAD/CNC) or **STL** (3D print), so the result is buildable — export to **STEP** (CAD/CNC) or **STL** (3D print),

View File

@ -29,41 +29,107 @@ TOOL_FILTER = "wood-*" # auto-discover every wood-* tool, no hardcoded list
REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling
_MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution _MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution
# A reference photo can be attached to "build something like this". claude -p # A reference can be attached to "build something like this": a photo, a PDF
# reads the image file (via its Read tool), so we just hand it an absolute path. # plan, a 3D model (STL/STEP/OBJ — rendered to an image first), or a web page
_IMG_URL = re.compile(r'https?://\S+?\.(?:png|jpe?g|webp|gif|bmp)\b', re.I) # (its text is pulled). claude -p reads images & PDFs directly via its Read tool.
IMG_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"}
DOC_EXTS = {".pdf"} # claude -p reads PDFs too
MESH_EXTS = {".stl", ".obj", ".ply", ".step", ".stp"}
_REF_EXTS = IMG_EXTS | DOC_EXTS | MESH_EXTS
_URL = re.compile(r'https?://\S+', re.I)
_IMAGE_DIRECTIVE = ( _IMAGE_DIRECTIVE = (
"A REFERENCE PHOTO of furniture is saved at this path:\n {path}\n" "A REFERENCE (photo / plan drawing / 3D render) is saved at this path:\n {path}\n"
"Open and look at that image file. The user wants to build something LIKE it " "Open and look at that file. The user wants to build something LIKE it from "
"from dimensional lumber and plywood. Infer the major parts, rough proportions, " "dimensional lumber and plywood. Infer the major parts, proportions, and "
"and joinery, and emit the tool calls to build a SIMPLIFIED, buildable version " "joinery, and emit the tool calls to build a SIMPLIFIED, buildable version with "
"with reasonable real dimensions in inches. This is an interpretation, not an " "reasonable real dimensions in inches. An interpretation, not an exact replica "
"exact replica — prefer standard stock sizes and right angles.\n\n" "— prefer standard stock sizes and right angles.\n\n")
) _TEXT_DIRECTIVE = (
"A build GUIDE / plan was provided as text (below). Use it to build a "
"simplified, buildable version in dimensional lumber.\n--- REFERENCE ---\n"
"{text}\n--- END REFERENCE ---\n\n")
def find_image_url(text: str) -> str | None: def find_reference_url(text: str) -> str | None:
m = _IMG_URL.search(text or "") m = _URL.search(text or "")
return m.group(0) if m else None return m.group(0) if m else None
def fetch_image(url: str, timeout: int = 20) -> str: def _ext(name: str) -> str:
"""Download an image URL to a temp file and return its path. Raises on return os.path.splitext(name.split("?")[0])[1].lower()
failure (caller decides how to surface it)."""
def fetch_url(url: str, timeout: int = 20) -> str:
"""Download a URL (image / PDF / 3D file) to a temp file; return its path."""
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urllib.request.urlopen(req, timeout=timeout) as resp: with urllib.request.urlopen(req, timeout=timeout) as resp:
ctype = (resp.headers.get("Content-Type") or "").split(";")[0].strip().lower() ctype = (resp.headers.get("Content-Type") or "").split(";")[0].strip().lower()
data = resp.read() data = resp.read()
ext = {"image/png": ".png", "image/jpeg": ".jpg", "image/webp": ".webp", ext = {"image/png": ".png", "image/jpeg": ".jpg", "image/webp": ".webp",
"image/gif": ".gif", "image/bmp": ".bmp"}.get(ctype) "image/gif": ".gif", "image/bmp": ".bmp", "application/pdf": ".pdf",
if ext is None: "model/stl": ".stl", "application/sla": ".stl"}.get(ctype) or _ext(url) or ".bin"
m = re.search(r'\.(png|jpe?g|webp|gif|bmp)\b', url, re.I)
ext = "." + m.group(1).lower() if m else ".img"
fd, path = tempfile.mkstemp(suffix=ext, prefix="woodshop-ref-") fd, path = tempfile.mkstemp(suffix=ext, prefix="woodshop-ref-")
with os.fdopen(fd, "wb") as f: with os.fdopen(fd, "wb") as f:
f.write(data) f.write(data)
return path return path
def fetch_web_text(url: str, limit: int = 8000, timeout: int = 20) -> str:
"""Fetch a web page and return its visible text (tags/scripts stripped)."""
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urllib.request.urlopen(req, timeout=timeout) as resp:
html = resp.read().decode("utf-8", "replace")
html = re.sub(r'(?is)<(script|style)[^>]*>.*?</\1>', ' ', html)
text = re.sub(r'(?s)<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text).strip()
return text[:limit]
def render_mesh(path: str) -> tuple[str, str]:
"""Render a 3D model (STL/OBJ/PLY/STEP) to a PNG and describe its bounding
box. Returns (png_path, dims_text). Needs the viewer stack (pyvista); STEP
also needs build123d."""
import tempfile as _tf
from pathlib import Path as _P
import pyvista as pv
ext = _P(path).suffix.lower()
if ext in (".step", ".stp"):
from build123d import export_stl, import_step
shape = import_step(path)
fd, stl = _tf.mkstemp(suffix=".stl"); os.close(fd)
export_stl(shape, stl)
mesh = pv.read(stl)
else:
mesh = pv.read(path)
b = mesh.bounds
dx, dy, dz = b[1] - b[0], b[3] - b[2], b[5] - b[4]
pl = pv.Plotter(off_screen=True, window_size=(900, 700))
pl.add_mesh(mesh, color="#c8965a", show_edges=True)
pl.view_isometric()
fd, png = _tf.mkstemp(suffix=".png", prefix="woodshop-render-"); os.close(fd)
pl.screenshot(png)
pl.close()
dims = (f"This is a render of a 3D model; its bounding box is about "
f"{dx:.1f} x {dy:.1f} x {dz:.1f} in the file's units (proportions are "
f"exact — treat units as inches unless that's implausible).")
return png, dims
def resolve_reference(source: str) -> tuple[str | None, str | None]:
"""Turn a reference (local path or URL) into (image_path, reference_text) for
interpret(). Image/PDF -> a path claude reads; 3D file -> rendered PNG + dims
text; web page -> page text. Raises on download/render failure."""
is_url = source.startswith(("http://", "https://"))
ext = _ext(source)
if is_url and ext not in _REF_EXTS:
return None, fetch_web_text(source) # a web-page guide
local = fetch_url(source) if is_url else source
if _ext(local) in MESH_EXTS:
return render_mesh(local) # (png, dims)
return local, None # image or PDF — read directly
# A board placed earlier in the SAME utterance is referenced as $1, $2, ... # A board placed earlier in the SAME utterance is referenced as $1, $2, ...
_SYMBOL = re.compile(r"\$(\d+)") _SYMBOL = re.compile(r"\$(\d+)")
@ -182,12 +248,16 @@ def _render_history(history: list[tuple[str, str]] | None) -> str:
def interpret(utterance: str, schemas: str, scene_text: str | None = None, def interpret(utterance: str, schemas: str, scene_text: str | None = None,
history: list[tuple[str, str]] | None = None, history: list[tuple[str, str]] | None = None,
image_path: str | None = None) -> list[dict]: image_path: str | None = None, reference_text: str | None = None) -> list[dict]:
scene = scene_text if scene_text is not None else scene_summary() scene = scene_text if scene_text is not None else scene_summary()
prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance, prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance,
history=_render_history(history)) history=_render_history(history))
prefix = ""
if image_path: if image_path:
prompt = _IMAGE_DIRECTIVE.format(path=os.path.abspath(image_path)) + prompt prefix += _IMAGE_DIRECTIVE.format(path=os.path.abspath(image_path))
if reference_text:
prefix += _TEXT_DIRECTIVE.format(text=reference_text[:8000])
prompt = prefix + prompt
raw = _run(REASON_PROVIDER.split(), stdin=prompt) raw = _run(REASON_PROVIDER.split(), stdin=prompt)
calls = _extract_calls(raw) calls = _extract_calls(raw)
if calls is None: if calls is None:
@ -284,8 +354,9 @@ def summarize(calls: list[dict], messages: list[str]) -> str:
def handle(utterance: str, schemas: str, voice: bool, verbose: bool, def handle(utterance: str, schemas: str, voice: bool, verbose: bool,
history: list[tuple[str, str]] | None = None, history: list[tuple[str, str]] | None = None,
image_path: str | None = None) -> None: image_path: str | None = None, reference_text: str | None = None) -> None:
calls = interpret(utterance, schemas, history=history, image_path=image_path) calls = interpret(utterance, schemas, history=history, image_path=image_path,
reference_text=reference_text)
messages = dispatch(calls, verbose=verbose) messages = dispatch(calls, verbose=verbose)
full = " ".join(m for m in messages if m).strip() full = " ".join(m for m in messages if m).strip()
spoken = summarize(calls, messages) spoken = summarize(calls, messages)
@ -313,13 +384,15 @@ def main(argv: list[str] | None = None) -> int:
ap.add_argument("--voice", action="store_true", help="Listen on the mic instead of typing") ap.add_argument("--voice", action="store_true", help="Listen on the mic instead of typing")
ap.add_argument("--duration", type=int, default=6, help="Mic recording seconds (--voice)") ap.add_argument("--duration", type=int, default=6, help="Mic recording seconds (--voice)")
ap.add_argument("--once", help="Run a single command (non-interactive) and exit") ap.add_argument("--once", help="Run a single command (non-interactive) and exit")
ap.add_argument("--image", help="Reference photo (path or URL) for 'build something like this'") ap.add_argument("--image", "--ref", dest="ref",
help="Reference for 'build something like this': a photo, PDF, 3D "
"model (stl/step/obj), or web-page URL (path or URL)")
ap.add_argument("--quiet", action="store_true", help="Don't print per-call detail") ap.add_argument("--quiet", action="store_true", help="Don't print per-call detail")
args = ap.parse_args(argv) args = ap.parse_args(argv)
image_path = None image_path = reference_text = None
if args.image: if args.ref:
image_path = fetch_image(args.image) if args.image.startswith("http") else args.image image_path, reference_text = resolve_reference(args.ref)
schemas = load_schemas() schemas = load_schemas()
if not schemas: if not schemas:
@ -329,7 +402,7 @@ def main(argv: list[str] | None = None) -> int:
if args.once is not None: if args.once is not None:
handle(args.once, schemas, voice=args.voice, verbose=not args.quiet, handle(args.once, schemas, voice=args.voice, verbose=not args.quiet,
image_path=image_path) image_path=image_path, reference_text=reference_text)
return 0 return 0
print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.") print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.")
@ -343,8 +416,8 @@ def main(argv: list[str] | None = None) -> int:
return 0 return 0
try: try:
handle(utterance, schemas, voice=args.voice, verbose=not args.quiet, handle(utterance, schemas, voice=args.voice, verbose=not args.quiet,
history=history, image_path=image_path) history=history, image_path=image_path, reference_text=reference_text)
image_path = None # the reference photo applies to the first turn only image_path = reference_text = None # the reference applies to the first turn only
except Exception as exc: # never let one bad command kill the session except Exception as exc: # never let one bad command kill the session
print(f"WoodShop: sorry, that command failed ({exc}).") print(f"WoodShop: sorry, that command failed ({exc}).")

View File

@ -17,7 +17,8 @@ from .controller import Controller
from .workers import run_async from .workers import run_async
_WHO_COLOR = {"you": "#9cdcfe", "ws": "#c8965a", "sys": "#e06c75"} _WHO_COLOR = {"you": "#9cdcfe", "ws": "#c8965a", "sys": "#e06c75"}
_IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp") # Reference files we accept by drag-drop / picker (images, PDF plans, 3D models).
_REF_EXTS = tuple(sorted(driver.IMG_EXTS | driver.DOC_EXTS | driver.MESH_EXTS))
class CommandBar(QWidget): class CommandBar(QWidget):
@ -41,7 +42,8 @@ class CommandBar(QWidget):
row.addWidget(self.mic) row.addWidget(self.mic)
self.attach = QPushButton("📎") self.attach = QPushButton("📎")
self.attach.setToolTip("Attach a reference photo — then say 'build something like this'") self.attach.setToolTip("Attach a reference (photo, PDF plan, or 3D model) — "
"then say 'build something like this'")
self.attach.setFixedWidth(40) self.attach.setFixedWidth(40)
self.attach.clicked.connect(self._attach_image) self.attach.clicked.connect(self._attach_image)
row.addWidget(self.attach) row.addWidget(self.attach)
@ -87,9 +89,10 @@ class CommandBar(QWidget):
super().mousePressEvent(e) super().mousePressEvent(e)
def _attach_image(self) -> None: def _attach_image(self) -> None:
patterns = " ".join("*" + e for e in _REF_EXTS)
path, _ = QFileDialog.getOpenFileName( path, _ = QFileDialog.getOpenFileName(
self, "Attach reference photo", "", self, "Attach reference (photo / PDF plan / 3D model)", "",
"Images (*.png *.jpg *.jpeg *.webp *.gif *.bmp)") f"References ({patterns});;All files (*)")
if path: if path:
self._set_image(path) self._set_image(path)
if not self.input.text().strip(): if not self.input.text().strip():
@ -97,7 +100,7 @@ class CommandBar(QWidget):
def dragEnterEvent(self, e): def dragEnterEvent(self, e):
md = e.mimeData() md = e.mimeData()
if md.hasImage() or any(u.toLocalFile().lower().endswith(_IMAGE_EXTS) if md.hasImage() or any(u.toLocalFile().lower().endswith(_REF_EXTS)
for u in md.urls()): for u in md.urls()):
e.acceptProposedAction() e.acceptProposedAction()
@ -105,7 +108,7 @@ class CommandBar(QWidget):
md = e.mimeData() md = e.mimeData()
for u in md.urls(): for u in md.urls():
p = u.toLocalFile() p = u.toLocalFile()
if p.lower().endswith(_IMAGE_EXTS): if p.lower().endswith(_REF_EXTS):
self._set_image(p) self._set_image(p)
break break
else: else:
@ -161,18 +164,18 @@ class CommandBar(QWidget):
self._run(text) self._run(text)
def _run(self, text: str) -> None: def _run(self, text: str) -> None:
image = self._pending_image source = self._pending_image or driver.find_reference_url(text)
url = None if image else driver.find_image_url(text) note = " 📎 reference" if source else ""
note = " 📎 photo" if (image or url) else ""
self._log("you", text + note) self._log("you", text + note)
self._set_image(None) self._set_image(None)
self._busy(True, "looking…" if (image or url) else "thinking…") self._busy(True, "studying reference…" if source else "thinking…")
def work(): def work():
path = image image_path = reference_text = None
if path is None and url: if source:
path = driver.fetch_image(url) # download the linked image image_path, reference_text = driver.resolve_reference(source)
return self.c.run_command(text, image_path=path) return self.c.run_command(text, image_path=image_path,
reference_text=reference_text)
def done(summary): def done(summary):
self._busy(False) self._busy(False)

View File

@ -427,9 +427,11 @@ class Controller(QObject):
except (SceneError, ValueError, KeyError) as exc: except (SceneError, ValueError, KeyError) as exc:
return str(exc).strip('"') return str(exc).strip('"')
def run_command(self, text: str, image_path: str | None = None) -> str: def run_command(self, text: str, image_path: str | None = None,
"""Interpret a spoken/typed command (optionally with a reference photo) and reference_text: str | None = None) -> str:
apply it. Returns a spoken summary. (Slow call from a worker thread.)""" """Interpret a spoken/typed command (optionally with a reference photo,
plan, 3D render, or guide text) and apply it. Returns a spoken summary.
(Slow call from a worker thread.)"""
from ..scene import spatial_summary from ..scene import spatial_summary
self.save() # ensure disk reflects current state self.save() # ensure disk reflects current state
sel = ", ".join(self.selected) if self.selected else "none" sel = ", ".join(self.selected) if self.selected else "none"
@ -437,7 +439,8 @@ class Controller(QObject):
+ f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}" + f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}"
+ "\n" + spatial_summary(self.scene)) + "\n" + spatial_summary(self.scene))
calls = driver.interpret(text, self.schemas(), scene_text=scene_text, calls = driver.interpret(text, self.schemas(), scene_text=scene_text,
history=self._history, image_path=image_path) history=self._history, image_path=image_path,
reference_text=reference_text)
messages = driver.dispatch(calls, verbose=False, executor=self.execute_call) messages = driver.dispatch(calls, verbose=False, executor=self.execute_call)
self._commit() self._commit()
spoken = driver.summarize(calls, messages) spoken = driver.summarize(calls, messages)

View File

@ -1,6 +1,8 @@
"""Tests for the driver's orchestration logic (external tools are mocked).""" """Tests for the driver's orchestration logic (external tools are mocked)."""
import json import json
import pytest
from woodshop import driver from woodshop import driver
from woodshop.cli import normalize_anchor from woodshop.cli import normalize_anchor
@ -148,32 +150,33 @@ def test_woodshop_cmd_falls_back_to_module(monkeypatch):
assert cmd[1:] == ["-m", "woodshop"] and cmd[0] # python -m woodshop assert cmd[1:] == ["-m", "woodshop"] and cmd[0] # python -m woodshop
def test_find_image_url(): def test_find_reference_url():
assert driver.find_image_url("build like this https://x.com/chair.jpg please") \ assert driver.find_reference_url("build like this https://x.com/chair.jpg please") \
== "https://x.com/chair.jpg" == "https://x.com/chair.jpg"
assert driver.find_image_url("https://x.com/a.PNG") == "https://x.com/a.PNG" assert driver.find_reference_url("see https://x.com/how-to") == "https://x.com/how-to"
assert driver.find_image_url("no image here http://x.com/page") is None assert driver.find_reference_url("no url here") is None
def test_interpret_includes_image_directive(monkeypatch, tmp_path): def test_interpret_includes_image_directive(monkeypatch, tmp_path):
captured = {} captured = {}
monkeypatch.setattr(driver, "_run", lambda cmd, stdin="": captured.update(prompt=stdin) or "[]")
def fake_run(cmd, stdin=""):
captured["prompt"] = stdin
return "[]"
img = tmp_path / "ref.jpg" img = tmp_path / "ref.jpg"
img.write_bytes(b"\xff\xd8\xff") # not a real jpeg, just a path img.write_bytes(b"\xff\xd8\xff")
monkeypatch.setattr(driver, "_run", fake_run)
driver.interpret("build something like this", schemas="[]", scene_text="empty", driver.interpret("build something like this", schemas="[]", scene_text="empty",
image_path=str(img)) image_path=str(img))
assert "REFERENCE PHOTO" in captured["prompt"] assert "REFERENCE" in captured["prompt"] and str(img) in captured["prompt"]
assert str(img) in captured["prompt"]
def test_fetch_image_writes_temp(monkeypatch): def test_interpret_includes_reference_text(monkeypatch):
import io captured = {}
monkeypatch.setattr(driver, "_run", lambda cmd, stdin="": captured.update(prompt=stdin) or "[]")
driver.interpret("build it", schemas="[]", scene_text="empty",
reference_text="Step 1: cut four legs 28 inches long.")
assert "build GUIDE" in captured["prompt"]
assert "cut four legs 28 inches" in captured["prompt"]
def test_fetch_url_writes_temp(monkeypatch):
class FakeResp: class FakeResp:
headers = {"Content-Type": "image/png"} headers = {"Content-Type": "image/png"}
def __enter__(self): return self def __enter__(self): return self
@ -181,9 +184,52 @@ def test_fetch_image_writes_temp(monkeypatch):
def read(self): return b"\x89PNG\r\n\x1a\n" def read(self): return b"\x89PNG\r\n\x1a\n"
monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp()) monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp())
path = driver.fetch_image("https://x.com/chair.png") path = driver.fetch_url("https://x.com/chair.png")
assert path.endswith(".png") assert path.endswith(".png")
with open(path, "rb") as f:
assert f.read().startswith(b"\x89PNG")
import os as _os import os as _os
_os.remove(path) _os.remove(path)
def test_fetch_web_text_strips_tags(monkeypatch):
html = b"<html><head><style>x{}</style></head><body><h1>Build</h1> a <b>shelf</b></body></html>"
class FakeResp:
def __enter__(self): return self
def __exit__(self, *a): return False
def read(self): return html
monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp())
text = driver.fetch_web_text("https://x.com/guide")
assert "Build a shelf" in text and "<" not in text and "x{}" not in text
def test_resolve_reference_routes_by_kind(monkeypatch, tmp_path):
# local image -> (path, None)
img = tmp_path / "a.png"; img.write_bytes(b"x")
assert driver.resolve_reference(str(img)) == (str(img), None)
# local pdf -> read directly (path, None)
pdf = tmp_path / "plan.pdf"; pdf.write_bytes(b"%PDF")
assert driver.resolve_reference(str(pdf)) == (str(pdf), None)
# web page URL -> (None, text)
monkeypatch.setattr(driver, "fetch_web_text", lambda u, **k: "guide text")
assert driver.resolve_reference("https://x.com/how-to-build") == (None, "guide text")
# 3D file -> render (mocked) -> (png, dims)
monkeypatch.setattr(driver, "render_mesh", lambda p: ("/tmp/r.png", "bbox 10x10x10"))
stl = tmp_path / "m.stl"; stl.write_bytes(b"solid")
assert driver.resolve_reference(str(stl)) == ("/tmp/r.png", "bbox 10x10x10")
def test_render_mesh_real_if_possible(tmp_path):
"""Render an actual STL if pyvista + a working off-screen GL are available;
skip cleanly otherwise (headless boxes often lack GL)."""
pv = pytest.importorskip("pyvista")
stl = tmp_path / "box.stl"
try:
pv.Cube().save(str(stl))
png, dims = driver.render_mesh(str(stl))
except Exception as exc: # no GL / off-screen unsupported here
pytest.skip(f"offscreen render unavailable: {exc}")
import os as _os
assert _os.path.exists(png) and png.endswith(".png")
assert "bounding box" in dims
_os.remove(png)

View File

@ -145,7 +145,7 @@ def test_run_command_threads_history(tmp_path, monkeypatch):
c = _controller(tmp_path) c = _controller(tmp_path)
seen = {} seen = {}
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None): def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None, reference_text=None):
seen["history"] = list(history or []) seen["history"] = list(history or [])
return [{"tool": "say", "args": {"text": "want me to add tenons?"}}] return [{"tool": "say", "args": {"text": "want me to add tenons?"}}]
@ -162,7 +162,7 @@ def test_run_command_forwards_image_path(tmp_path, monkeypatch):
c = _controller(tmp_path) c = _controller(tmp_path)
seen = {} seen = {}
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None): def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None, reference_text=None):
seen["image_path"] = image_path seen["image_path"] = image_path
return [{"tool": "say", "args": {"text": "ok"}}] return [{"tool": "say", "args": {"text": "ok"}}]