Reference input now accepts PDF plans, 3D models, and web links
Extends "build something like this" beyond photos: - driver.resolve_reference(source) routes any path/URL: image/PDF → a path claude -p reads directly; STL/STEP/OBJ → render_mesh() renders an isometric PNG (pyvista; STEP via build123d→STL) and reports the bounding box; a normal web URL → fetch_web_text() pulls the page's visible text. - interpret(reference_text=) injects guide/render-dims text alongside any image directive; handle() + controller.run_command() + woodshop-talk --ref pass it. - command bar: picker/drag-drop accept images + .pdf + 3D files; any pasted URL is resolved; resolution (download/render/fetch) runs off the UI thread. - find_image_url→find_reference_url (any URL); fetch_image→fetch_url (generic). - tests: URL detect, image+reference-text directives, fetch_url, web-text strip, resolve_reference routing per kind, real STL render (skips without GL). 220 pass. 3D render gives the model EXACT proportions (+ bbox) instead of a 2D guess. Honest limit: render needs the viewer stack + working off-screen GL on your box; the live model round-trip still wants your eyes to confirm. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c623ad2576
commit
84ae6d8756
10
README.md
10
README.md
|
|
@ -8,10 +8,12 @@ Talk to it like the Star Trek holodeck and watch furniture build itself:
|
||||||
|
|
||||||
> *"Build a coffee table: a four foot by two foot frame from 2x4s, with four legs 18 inches tall standing at the corners."*
|
> *"Build a coffee table: a four foot by two foot frame from 2x4s, with four legs 18 inches tall standing at the corners."*
|
||||||
|
|
||||||
You can also **attach a reference photo** (📎, drag-drop, paste, or an image URL)
|
You can also **attach a reference** (📎, drag-drop, paste, or a URL) and say
|
||||||
and say *"build something like this"* — WoodShop hands the image to the model and
|
*"build something like this"*: a **photo**, a **PDF plan**, a **3D model**
|
||||||
builds a simplified, buildable interpretation in dimensional lumber that you then
|
(STL/STEP/OBJ — rendered to an image, with its bounding box measured), or a
|
||||||
refine by voice/text. (It's an interpretation, not a measured replica.)
|
**web-page guide** (its text is pulled). WoodShop builds a simplified, buildable
|
||||||
|
interpretation in dimensional lumber that you then refine by voice/text. (It's an
|
||||||
|
interpretation, not a measured replica.)
|
||||||
|
|
||||||
Each board is real dimensional lumber (a 2x4 is modeled at its true 1.5″ × 3.5″),
|
Each board is real dimensional lumber (a 2x4 is modeled at its true 1.5″ × 3.5″),
|
||||||
so the result is buildable — export to **STEP** (CAD/CNC) or **STL** (3D print),
|
so the result is buildable — export to **STEP** (CAD/CNC) or **STL** (3D print),
|
||||||
|
|
|
||||||
|
|
@ -29,41 +29,107 @@ TOOL_FILTER = "wood-*" # auto-discover every wood-* tool, no hardcoded list
|
||||||
REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling
|
REASON_PROVIDER = "claude -p" # chosen for reliable structured tool-calling
|
||||||
_MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution
|
_MAX_HISTORY = 6 # turns of recent conversation fed back for reference-resolution
|
||||||
|
|
||||||
# A reference photo can be attached to "build something like this". claude -p
|
# A reference can be attached to "build something like this": a photo, a PDF
|
||||||
# reads the image file (via its Read tool), so we just hand it an absolute path.
|
# plan, a 3D model (STL/STEP/OBJ — rendered to an image first), or a web page
|
||||||
_IMG_URL = re.compile(r'https?://\S+?\.(?:png|jpe?g|webp|gif|bmp)\b', re.I)
|
# (its text is pulled). claude -p reads images & PDFs directly via its Read tool.
|
||||||
|
IMG_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp"}
|
||||||
|
DOC_EXTS = {".pdf"} # claude -p reads PDFs too
|
||||||
|
MESH_EXTS = {".stl", ".obj", ".ply", ".step", ".stp"}
|
||||||
|
_REF_EXTS = IMG_EXTS | DOC_EXTS | MESH_EXTS
|
||||||
|
_URL = re.compile(r'https?://\S+', re.I)
|
||||||
|
|
||||||
_IMAGE_DIRECTIVE = (
|
_IMAGE_DIRECTIVE = (
|
||||||
"A REFERENCE PHOTO of furniture is saved at this path:\n {path}\n"
|
"A REFERENCE (photo / plan drawing / 3D render) is saved at this path:\n {path}\n"
|
||||||
"Open and look at that image file. The user wants to build something LIKE it "
|
"Open and look at that file. The user wants to build something LIKE it from "
|
||||||
"from dimensional lumber and plywood. Infer the major parts, rough proportions, "
|
"dimensional lumber and plywood. Infer the major parts, proportions, and "
|
||||||
"and joinery, and emit the tool calls to build a SIMPLIFIED, buildable version "
|
"joinery, and emit the tool calls to build a SIMPLIFIED, buildable version with "
|
||||||
"with reasonable real dimensions in inches. This is an interpretation, not an "
|
"reasonable real dimensions in inches. An interpretation, not an exact replica "
|
||||||
"exact replica — prefer standard stock sizes and right angles.\n\n"
|
"— prefer standard stock sizes and right angles.\n\n")
|
||||||
)
|
_TEXT_DIRECTIVE = (
|
||||||
|
"A build GUIDE / plan was provided as text (below). Use it to build a "
|
||||||
|
"simplified, buildable version in dimensional lumber.\n--- REFERENCE ---\n"
|
||||||
|
"{text}\n--- END REFERENCE ---\n\n")
|
||||||
|
|
||||||
|
|
||||||
def find_image_url(text: str) -> str | None:
|
def find_reference_url(text: str) -> str | None:
|
||||||
m = _IMG_URL.search(text or "")
|
m = _URL.search(text or "")
|
||||||
return m.group(0) if m else None
|
return m.group(0) if m else None
|
||||||
|
|
||||||
|
|
||||||
def fetch_image(url: str, timeout: int = 20) -> str:
|
def _ext(name: str) -> str:
|
||||||
"""Download an image URL to a temp file and return its path. Raises on
|
return os.path.splitext(name.split("?")[0])[1].lower()
|
||||||
failure (caller decides how to surface it)."""
|
|
||||||
|
|
||||||
|
def fetch_url(url: str, timeout: int = 20) -> str:
|
||||||
|
"""Download a URL (image / PDF / 3D file) to a temp file; return its path."""
|
||||||
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
ctype = (resp.headers.get("Content-Type") or "").split(";")[0].strip().lower()
|
ctype = (resp.headers.get("Content-Type") or "").split(";")[0].strip().lower()
|
||||||
data = resp.read()
|
data = resp.read()
|
||||||
ext = {"image/png": ".png", "image/jpeg": ".jpg", "image/webp": ".webp",
|
ext = {"image/png": ".png", "image/jpeg": ".jpg", "image/webp": ".webp",
|
||||||
"image/gif": ".gif", "image/bmp": ".bmp"}.get(ctype)
|
"image/gif": ".gif", "image/bmp": ".bmp", "application/pdf": ".pdf",
|
||||||
if ext is None:
|
"model/stl": ".stl", "application/sla": ".stl"}.get(ctype) or _ext(url) or ".bin"
|
||||||
m = re.search(r'\.(png|jpe?g|webp|gif|bmp)\b', url, re.I)
|
|
||||||
ext = "." + m.group(1).lower() if m else ".img"
|
|
||||||
fd, path = tempfile.mkstemp(suffix=ext, prefix="woodshop-ref-")
|
fd, path = tempfile.mkstemp(suffix=ext, prefix="woodshop-ref-")
|
||||||
with os.fdopen(fd, "wb") as f:
|
with os.fdopen(fd, "wb") as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_web_text(url: str, limit: int = 8000, timeout: int = 20) -> str:
|
||||||
|
"""Fetch a web page and return its visible text (tags/scripts stripped)."""
|
||||||
|
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
html = resp.read().decode("utf-8", "replace")
|
||||||
|
html = re.sub(r'(?is)<(script|style)[^>]*>.*?</\1>', ' ', html)
|
||||||
|
text = re.sub(r'(?s)<[^>]+>', ' ', html)
|
||||||
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
|
return text[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def render_mesh(path: str) -> tuple[str, str]:
|
||||||
|
"""Render a 3D model (STL/OBJ/PLY/STEP) to a PNG and describe its bounding
|
||||||
|
box. Returns (png_path, dims_text). Needs the viewer stack (pyvista); STEP
|
||||||
|
also needs build123d."""
|
||||||
|
import tempfile as _tf
|
||||||
|
from pathlib import Path as _P
|
||||||
|
|
||||||
|
import pyvista as pv
|
||||||
|
ext = _P(path).suffix.lower()
|
||||||
|
if ext in (".step", ".stp"):
|
||||||
|
from build123d import export_stl, import_step
|
||||||
|
shape = import_step(path)
|
||||||
|
fd, stl = _tf.mkstemp(suffix=".stl"); os.close(fd)
|
||||||
|
export_stl(shape, stl)
|
||||||
|
mesh = pv.read(stl)
|
||||||
|
else:
|
||||||
|
mesh = pv.read(path)
|
||||||
|
b = mesh.bounds
|
||||||
|
dx, dy, dz = b[1] - b[0], b[3] - b[2], b[5] - b[4]
|
||||||
|
pl = pv.Plotter(off_screen=True, window_size=(900, 700))
|
||||||
|
pl.add_mesh(mesh, color="#c8965a", show_edges=True)
|
||||||
|
pl.view_isometric()
|
||||||
|
fd, png = _tf.mkstemp(suffix=".png", prefix="woodshop-render-"); os.close(fd)
|
||||||
|
pl.screenshot(png)
|
||||||
|
pl.close()
|
||||||
|
dims = (f"This is a render of a 3D model; its bounding box is about "
|
||||||
|
f"{dx:.1f} x {dy:.1f} x {dz:.1f} in the file's units (proportions are "
|
||||||
|
f"exact — treat units as inches unless that's implausible).")
|
||||||
|
return png, dims
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_reference(source: str) -> tuple[str | None, str | None]:
|
||||||
|
"""Turn a reference (local path or URL) into (image_path, reference_text) for
|
||||||
|
interpret(). Image/PDF -> a path claude reads; 3D file -> rendered PNG + dims
|
||||||
|
text; web page -> page text. Raises on download/render failure."""
|
||||||
|
is_url = source.startswith(("http://", "https://"))
|
||||||
|
ext = _ext(source)
|
||||||
|
if is_url and ext not in _REF_EXTS:
|
||||||
|
return None, fetch_web_text(source) # a web-page guide
|
||||||
|
local = fetch_url(source) if is_url else source
|
||||||
|
if _ext(local) in MESH_EXTS:
|
||||||
|
return render_mesh(local) # (png, dims)
|
||||||
|
return local, None # image or PDF — read directly
|
||||||
|
|
||||||
# A board placed earlier in the SAME utterance is referenced as $1, $2, ...
|
# A board placed earlier in the SAME utterance is referenced as $1, $2, ...
|
||||||
_SYMBOL = re.compile(r"\$(\d+)")
|
_SYMBOL = re.compile(r"\$(\d+)")
|
||||||
|
|
||||||
|
|
@ -182,12 +248,16 @@ def _render_history(history: list[tuple[str, str]] | None) -> str:
|
||||||
|
|
||||||
def interpret(utterance: str, schemas: str, scene_text: str | None = None,
|
def interpret(utterance: str, schemas: str, scene_text: str | None = None,
|
||||||
history: list[tuple[str, str]] | None = None,
|
history: list[tuple[str, str]] | None = None,
|
||||||
image_path: str | None = None) -> list[dict]:
|
image_path: str | None = None, reference_text: str | None = None) -> list[dict]:
|
||||||
scene = scene_text if scene_text is not None else scene_summary()
|
scene = scene_text if scene_text is not None else scene_summary()
|
||||||
prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance,
|
prompt = SYSTEM.format(schemas=schemas, scene=scene, utterance=utterance,
|
||||||
history=_render_history(history))
|
history=_render_history(history))
|
||||||
|
prefix = ""
|
||||||
if image_path:
|
if image_path:
|
||||||
prompt = _IMAGE_DIRECTIVE.format(path=os.path.abspath(image_path)) + prompt
|
prefix += _IMAGE_DIRECTIVE.format(path=os.path.abspath(image_path))
|
||||||
|
if reference_text:
|
||||||
|
prefix += _TEXT_DIRECTIVE.format(text=reference_text[:8000])
|
||||||
|
prompt = prefix + prompt
|
||||||
raw = _run(REASON_PROVIDER.split(), stdin=prompt)
|
raw = _run(REASON_PROVIDER.split(), stdin=prompt)
|
||||||
calls = _extract_calls(raw)
|
calls = _extract_calls(raw)
|
||||||
if calls is None:
|
if calls is None:
|
||||||
|
|
@ -284,8 +354,9 @@ def summarize(calls: list[dict], messages: list[str]) -> str:
|
||||||
|
|
||||||
def handle(utterance: str, schemas: str, voice: bool, verbose: bool,
|
def handle(utterance: str, schemas: str, voice: bool, verbose: bool,
|
||||||
history: list[tuple[str, str]] | None = None,
|
history: list[tuple[str, str]] | None = None,
|
||||||
image_path: str | None = None) -> None:
|
image_path: str | None = None, reference_text: str | None = None) -> None:
|
||||||
calls = interpret(utterance, schemas, history=history, image_path=image_path)
|
calls = interpret(utterance, schemas, history=history, image_path=image_path,
|
||||||
|
reference_text=reference_text)
|
||||||
messages = dispatch(calls, verbose=verbose)
|
messages = dispatch(calls, verbose=verbose)
|
||||||
full = " ".join(m for m in messages if m).strip()
|
full = " ".join(m for m in messages if m).strip()
|
||||||
spoken = summarize(calls, messages)
|
spoken = summarize(calls, messages)
|
||||||
|
|
@ -313,13 +384,15 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
ap.add_argument("--voice", action="store_true", help="Listen on the mic instead of typing")
|
ap.add_argument("--voice", action="store_true", help="Listen on the mic instead of typing")
|
||||||
ap.add_argument("--duration", type=int, default=6, help="Mic recording seconds (--voice)")
|
ap.add_argument("--duration", type=int, default=6, help="Mic recording seconds (--voice)")
|
||||||
ap.add_argument("--once", help="Run a single command (non-interactive) and exit")
|
ap.add_argument("--once", help="Run a single command (non-interactive) and exit")
|
||||||
ap.add_argument("--image", help="Reference photo (path or URL) for 'build something like this'")
|
ap.add_argument("--image", "--ref", dest="ref",
|
||||||
|
help="Reference for 'build something like this': a photo, PDF, 3D "
|
||||||
|
"model (stl/step/obj), or web-page URL (path or URL)")
|
||||||
ap.add_argument("--quiet", action="store_true", help="Don't print per-call detail")
|
ap.add_argument("--quiet", action="store_true", help="Don't print per-call detail")
|
||||||
args = ap.parse_args(argv)
|
args = ap.parse_args(argv)
|
||||||
|
|
||||||
image_path = None
|
image_path = reference_text = None
|
||||||
if args.image:
|
if args.ref:
|
||||||
image_path = fetch_image(args.image) if args.image.startswith("http") else args.image
|
image_path, reference_text = resolve_reference(args.ref)
|
||||||
|
|
||||||
schemas = load_schemas()
|
schemas = load_schemas()
|
||||||
if not schemas:
|
if not schemas:
|
||||||
|
|
@ -329,7 +402,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
|
|
||||||
if args.once is not None:
|
if args.once is not None:
|
||||||
handle(args.once, schemas, voice=args.voice, verbose=not args.quiet,
|
handle(args.once, schemas, voice=args.voice, verbose=not args.quiet,
|
||||||
image_path=image_path)
|
image_path=image_path, reference_text=reference_text)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.")
|
print("WoodShop ready. Say things like 'place a 6 foot 2x4'. Ctrl-C to quit.")
|
||||||
|
|
@ -343,8 +416,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
return 0
|
return 0
|
||||||
try:
|
try:
|
||||||
handle(utterance, schemas, voice=args.voice, verbose=not args.quiet,
|
handle(utterance, schemas, voice=args.voice, verbose=not args.quiet,
|
||||||
history=history, image_path=image_path)
|
history=history, image_path=image_path, reference_text=reference_text)
|
||||||
image_path = None # the reference photo applies to the first turn only
|
image_path = reference_text = None # the reference applies to the first turn only
|
||||||
except Exception as exc: # never let one bad command kill the session
|
except Exception as exc: # never let one bad command kill the session
|
||||||
print(f"WoodShop: sorry, that command failed ({exc}).")
|
print(f"WoodShop: sorry, that command failed ({exc}).")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,8 @@ from .controller import Controller
|
||||||
from .workers import run_async
|
from .workers import run_async
|
||||||
|
|
||||||
_WHO_COLOR = {"you": "#9cdcfe", "ws": "#c8965a", "sys": "#e06c75"}
|
_WHO_COLOR = {"you": "#9cdcfe", "ws": "#c8965a", "sys": "#e06c75"}
|
||||||
_IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp")
|
# Reference files we accept by drag-drop / picker (images, PDF plans, 3D models).
|
||||||
|
_REF_EXTS = tuple(sorted(driver.IMG_EXTS | driver.DOC_EXTS | driver.MESH_EXTS))
|
||||||
|
|
||||||
|
|
||||||
class CommandBar(QWidget):
|
class CommandBar(QWidget):
|
||||||
|
|
@ -41,7 +42,8 @@ class CommandBar(QWidget):
|
||||||
row.addWidget(self.mic)
|
row.addWidget(self.mic)
|
||||||
|
|
||||||
self.attach = QPushButton("📎")
|
self.attach = QPushButton("📎")
|
||||||
self.attach.setToolTip("Attach a reference photo — then say 'build something like this'")
|
self.attach.setToolTip("Attach a reference (photo, PDF plan, or 3D model) — "
|
||||||
|
"then say 'build something like this'")
|
||||||
self.attach.setFixedWidth(40)
|
self.attach.setFixedWidth(40)
|
||||||
self.attach.clicked.connect(self._attach_image)
|
self.attach.clicked.connect(self._attach_image)
|
||||||
row.addWidget(self.attach)
|
row.addWidget(self.attach)
|
||||||
|
|
@ -87,9 +89,10 @@ class CommandBar(QWidget):
|
||||||
super().mousePressEvent(e)
|
super().mousePressEvent(e)
|
||||||
|
|
||||||
def _attach_image(self) -> None:
|
def _attach_image(self) -> None:
|
||||||
|
patterns = " ".join("*" + e for e in _REF_EXTS)
|
||||||
path, _ = QFileDialog.getOpenFileName(
|
path, _ = QFileDialog.getOpenFileName(
|
||||||
self, "Attach reference photo", "",
|
self, "Attach reference (photo / PDF plan / 3D model)", "",
|
||||||
"Images (*.png *.jpg *.jpeg *.webp *.gif *.bmp)")
|
f"References ({patterns});;All files (*)")
|
||||||
if path:
|
if path:
|
||||||
self._set_image(path)
|
self._set_image(path)
|
||||||
if not self.input.text().strip():
|
if not self.input.text().strip():
|
||||||
|
|
@ -97,7 +100,7 @@ class CommandBar(QWidget):
|
||||||
|
|
||||||
def dragEnterEvent(self, e):
|
def dragEnterEvent(self, e):
|
||||||
md = e.mimeData()
|
md = e.mimeData()
|
||||||
if md.hasImage() or any(u.toLocalFile().lower().endswith(_IMAGE_EXTS)
|
if md.hasImage() or any(u.toLocalFile().lower().endswith(_REF_EXTS)
|
||||||
for u in md.urls()):
|
for u in md.urls()):
|
||||||
e.acceptProposedAction()
|
e.acceptProposedAction()
|
||||||
|
|
||||||
|
|
@ -105,7 +108,7 @@ class CommandBar(QWidget):
|
||||||
md = e.mimeData()
|
md = e.mimeData()
|
||||||
for u in md.urls():
|
for u in md.urls():
|
||||||
p = u.toLocalFile()
|
p = u.toLocalFile()
|
||||||
if p.lower().endswith(_IMAGE_EXTS):
|
if p.lower().endswith(_REF_EXTS):
|
||||||
self._set_image(p)
|
self._set_image(p)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
|
@ -161,18 +164,18 @@ class CommandBar(QWidget):
|
||||||
self._run(text)
|
self._run(text)
|
||||||
|
|
||||||
def _run(self, text: str) -> None:
|
def _run(self, text: str) -> None:
|
||||||
image = self._pending_image
|
source = self._pending_image or driver.find_reference_url(text)
|
||||||
url = None if image else driver.find_image_url(text)
|
note = " 📎 reference" if source else ""
|
||||||
note = " 📎 photo" if (image or url) else ""
|
|
||||||
self._log("you", text + note)
|
self._log("you", text + note)
|
||||||
self._set_image(None)
|
self._set_image(None)
|
||||||
self._busy(True, "looking…" if (image or url) else "thinking…")
|
self._busy(True, "studying reference…" if source else "thinking…")
|
||||||
|
|
||||||
def work():
|
def work():
|
||||||
path = image
|
image_path = reference_text = None
|
||||||
if path is None and url:
|
if source:
|
||||||
path = driver.fetch_image(url) # download the linked image
|
image_path, reference_text = driver.resolve_reference(source)
|
||||||
return self.c.run_command(text, image_path=path)
|
return self.c.run_command(text, image_path=image_path,
|
||||||
|
reference_text=reference_text)
|
||||||
|
|
||||||
def done(summary):
|
def done(summary):
|
||||||
self._busy(False)
|
self._busy(False)
|
||||||
|
|
|
||||||
|
|
@ -427,9 +427,11 @@ class Controller(QObject):
|
||||||
except (SceneError, ValueError, KeyError) as exc:
|
except (SceneError, ValueError, KeyError) as exc:
|
||||||
return str(exc).strip('"')
|
return str(exc).strip('"')
|
||||||
|
|
||||||
def run_command(self, text: str, image_path: str | None = None) -> str:
|
def run_command(self, text: str, image_path: str | None = None,
|
||||||
"""Interpret a spoken/typed command (optionally with a reference photo) and
|
reference_text: str | None = None) -> str:
|
||||||
apply it. Returns a spoken summary. (Slow — call from a worker thread.)"""
|
"""Interpret a spoken/typed command (optionally with a reference photo,
|
||||||
|
plan, 3D render, or guide text) and apply it. Returns a spoken summary.
|
||||||
|
(Slow — call from a worker thread.)"""
|
||||||
from ..scene import spatial_summary
|
from ..scene import spatial_summary
|
||||||
self.save() # ensure disk reflects current state
|
self.save() # ensure disk reflects current state
|
||||||
sel = ", ".join(self.selected) if self.selected else "none"
|
sel = ", ".join(self.selected) if self.selected else "none"
|
||||||
|
|
@ -437,7 +439,8 @@ class Controller(QObject):
|
||||||
+ f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}"
|
+ f"\nCurrently selected ('these' / 'them' / 'the selected'): {sel}"
|
||||||
+ "\n" + spatial_summary(self.scene))
|
+ "\n" + spatial_summary(self.scene))
|
||||||
calls = driver.interpret(text, self.schemas(), scene_text=scene_text,
|
calls = driver.interpret(text, self.schemas(), scene_text=scene_text,
|
||||||
history=self._history, image_path=image_path)
|
history=self._history, image_path=image_path,
|
||||||
|
reference_text=reference_text)
|
||||||
messages = driver.dispatch(calls, verbose=False, executor=self.execute_call)
|
messages = driver.dispatch(calls, verbose=False, executor=self.execute_call)
|
||||||
self._commit()
|
self._commit()
|
||||||
spoken = driver.summarize(calls, messages)
|
spoken = driver.summarize(calls, messages)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
"""Tests for the driver's orchestration logic (external tools are mocked)."""
|
"""Tests for the driver's orchestration logic (external tools are mocked)."""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from woodshop import driver
|
from woodshop import driver
|
||||||
from woodshop.cli import normalize_anchor
|
from woodshop.cli import normalize_anchor
|
||||||
|
|
||||||
|
|
@ -148,32 +150,33 @@ def test_woodshop_cmd_falls_back_to_module(monkeypatch):
|
||||||
assert cmd[1:] == ["-m", "woodshop"] and cmd[0] # python -m woodshop
|
assert cmd[1:] == ["-m", "woodshop"] and cmd[0] # python -m woodshop
|
||||||
|
|
||||||
|
|
||||||
def test_find_image_url():
|
def test_find_reference_url():
|
||||||
assert driver.find_image_url("build like this https://x.com/chair.jpg please") \
|
assert driver.find_reference_url("build like this https://x.com/chair.jpg please") \
|
||||||
== "https://x.com/chair.jpg"
|
== "https://x.com/chair.jpg"
|
||||||
assert driver.find_image_url("https://x.com/a.PNG") == "https://x.com/a.PNG"
|
assert driver.find_reference_url("see https://x.com/how-to") == "https://x.com/how-to"
|
||||||
assert driver.find_image_url("no image here http://x.com/page") is None
|
assert driver.find_reference_url("no url here") is None
|
||||||
|
|
||||||
|
|
||||||
def test_interpret_includes_image_directive(monkeypatch, tmp_path):
|
def test_interpret_includes_image_directive(monkeypatch, tmp_path):
|
||||||
captured = {}
|
captured = {}
|
||||||
|
monkeypatch.setattr(driver, "_run", lambda cmd, stdin="": captured.update(prompt=stdin) or "[]")
|
||||||
def fake_run(cmd, stdin=""):
|
|
||||||
captured["prompt"] = stdin
|
|
||||||
return "[]"
|
|
||||||
|
|
||||||
img = tmp_path / "ref.jpg"
|
img = tmp_path / "ref.jpg"
|
||||||
img.write_bytes(b"\xff\xd8\xff") # not a real jpeg, just a path
|
img.write_bytes(b"\xff\xd8\xff")
|
||||||
monkeypatch.setattr(driver, "_run", fake_run)
|
|
||||||
driver.interpret("build something like this", schemas="[]", scene_text="empty",
|
driver.interpret("build something like this", schemas="[]", scene_text="empty",
|
||||||
image_path=str(img))
|
image_path=str(img))
|
||||||
assert "REFERENCE PHOTO" in captured["prompt"]
|
assert "REFERENCE" in captured["prompt"] and str(img) in captured["prompt"]
|
||||||
assert str(img) in captured["prompt"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_image_writes_temp(monkeypatch):
|
def test_interpret_includes_reference_text(monkeypatch):
|
||||||
import io
|
captured = {}
|
||||||
|
monkeypatch.setattr(driver, "_run", lambda cmd, stdin="": captured.update(prompt=stdin) or "[]")
|
||||||
|
driver.interpret("build it", schemas="[]", scene_text="empty",
|
||||||
|
reference_text="Step 1: cut four legs 28 inches long.")
|
||||||
|
assert "build GUIDE" in captured["prompt"]
|
||||||
|
assert "cut four legs 28 inches" in captured["prompt"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_url_writes_temp(monkeypatch):
|
||||||
class FakeResp:
|
class FakeResp:
|
||||||
headers = {"Content-Type": "image/png"}
|
headers = {"Content-Type": "image/png"}
|
||||||
def __enter__(self): return self
|
def __enter__(self): return self
|
||||||
|
|
@ -181,9 +184,52 @@ def test_fetch_image_writes_temp(monkeypatch):
|
||||||
def read(self): return b"\x89PNG\r\n\x1a\n"
|
def read(self): return b"\x89PNG\r\n\x1a\n"
|
||||||
|
|
||||||
monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp())
|
monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp())
|
||||||
path = driver.fetch_image("https://x.com/chair.png")
|
path = driver.fetch_url("https://x.com/chair.png")
|
||||||
assert path.endswith(".png")
|
assert path.endswith(".png")
|
||||||
with open(path, "rb") as f:
|
|
||||||
assert f.read().startswith(b"\x89PNG")
|
|
||||||
import os as _os
|
import os as _os
|
||||||
_os.remove(path)
|
_os.remove(path)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_web_text_strips_tags(monkeypatch):
|
||||||
|
html = b"<html><head><style>x{}</style></head><body><h1>Build</h1> a <b>shelf</b></body></html>"
|
||||||
|
|
||||||
|
class FakeResp:
|
||||||
|
def __enter__(self): return self
|
||||||
|
def __exit__(self, *a): return False
|
||||||
|
def read(self): return html
|
||||||
|
|
||||||
|
monkeypatch.setattr(driver.urllib.request, "urlopen", lambda *a, **k: FakeResp())
|
||||||
|
text = driver.fetch_web_text("https://x.com/guide")
|
||||||
|
assert "Build a shelf" in text and "<" not in text and "x{}" not in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_reference_routes_by_kind(monkeypatch, tmp_path):
|
||||||
|
# local image -> (path, None)
|
||||||
|
img = tmp_path / "a.png"; img.write_bytes(b"x")
|
||||||
|
assert driver.resolve_reference(str(img)) == (str(img), None)
|
||||||
|
# local pdf -> read directly (path, None)
|
||||||
|
pdf = tmp_path / "plan.pdf"; pdf.write_bytes(b"%PDF")
|
||||||
|
assert driver.resolve_reference(str(pdf)) == (str(pdf), None)
|
||||||
|
# web page URL -> (None, text)
|
||||||
|
monkeypatch.setattr(driver, "fetch_web_text", lambda u, **k: "guide text")
|
||||||
|
assert driver.resolve_reference("https://x.com/how-to-build") == (None, "guide text")
|
||||||
|
# 3D file -> render (mocked) -> (png, dims)
|
||||||
|
monkeypatch.setattr(driver, "render_mesh", lambda p: ("/tmp/r.png", "bbox 10x10x10"))
|
||||||
|
stl = tmp_path / "m.stl"; stl.write_bytes(b"solid")
|
||||||
|
assert driver.resolve_reference(str(stl)) == ("/tmp/r.png", "bbox 10x10x10")
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_mesh_real_if_possible(tmp_path):
|
||||||
|
"""Render an actual STL if pyvista + a working off-screen GL are available;
|
||||||
|
skip cleanly otherwise (headless boxes often lack GL)."""
|
||||||
|
pv = pytest.importorskip("pyvista")
|
||||||
|
stl = tmp_path / "box.stl"
|
||||||
|
try:
|
||||||
|
pv.Cube().save(str(stl))
|
||||||
|
png, dims = driver.render_mesh(str(stl))
|
||||||
|
except Exception as exc: # no GL / off-screen unsupported here
|
||||||
|
pytest.skip(f"offscreen render unavailable: {exc}")
|
||||||
|
import os as _os
|
||||||
|
assert _os.path.exists(png) and png.endswith(".png")
|
||||||
|
assert "bounding box" in dims
|
||||||
|
_os.remove(png)
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,7 @@ def test_run_command_threads_history(tmp_path, monkeypatch):
|
||||||
c = _controller(tmp_path)
|
c = _controller(tmp_path)
|
||||||
seen = {}
|
seen = {}
|
||||||
|
|
||||||
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None):
|
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None, reference_text=None):
|
||||||
seen["history"] = list(history or [])
|
seen["history"] = list(history or [])
|
||||||
return [{"tool": "say", "args": {"text": "want me to add tenons?"}}]
|
return [{"tool": "say", "args": {"text": "want me to add tenons?"}}]
|
||||||
|
|
||||||
|
|
@ -162,7 +162,7 @@ def test_run_command_forwards_image_path(tmp_path, monkeypatch):
|
||||||
c = _controller(tmp_path)
|
c = _controller(tmp_path)
|
||||||
seen = {}
|
seen = {}
|
||||||
|
|
||||||
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None):
|
def fake_interpret(text, schemas, scene_text=None, history=None, image_path=None, reference_text=None):
|
||||||
seen["image_path"] = image_path
|
seen["image_path"] = image_path
|
||||||
return [{"tool": "say", "args": {"text": "ok"}}]
|
return [{"tool": "say", "args": {"text": "ok"}}]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue