From 68ab329c33aaeee053573a904259fb9f2f36dce4 Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 17 Jan 2026 13:42:20 -0400 Subject: [PATCH] Add interactive step testing to Tool Builder - New TestStepDialog for testing individual steps from the GUI - Test button in Tool Builder to test selected step - Auto-detects variables from step templates - Multiple assertion types: not_empty, contains, valid_json, matches_regex, min/max_length, equals, valid_python - Background execution with timing metrics - Provider override option for testing with mock provider - Output variable display and assertion pass/fail results Completes M5: Testing & Polish milestone (100%) Co-Authored-By: Claude Opus 4.5 --- src/cmdforge/gui/dialogs/test_step_dialog.py | 496 +++++++++++++++++++ src/cmdforge/gui/pages/tool_builder_page.py | 21 + 2 files changed, 517 insertions(+) create mode 100644 src/cmdforge/gui/dialogs/test_step_dialog.py diff --git a/src/cmdforge/gui/dialogs/test_step_dialog.py b/src/cmdforge/gui/dialogs/test_step_dialog.py new file mode 100644 index 0000000..cd34bf7 --- /dev/null +++ b/src/cmdforge/gui/dialogs/test_step_dialog.py @@ -0,0 +1,496 @@ +"""Test Step Dialog for interactive step testing.""" + +import json +import re +import time +from typing import Union + +from PySide6.QtWidgets import ( + QDialog, QVBoxLayout, QHBoxLayout, QFormLayout, QGroupBox, + QLabel, QLineEdit, QPlainTextEdit, QPushButton, QComboBox, + QTextEdit, QTableWidget, QTableWidgetItem, QHeaderView, + QSplitter, QWidget, QMessageBox +) +from PySide6.QtCore import Qt, QThread, Signal +from PySide6.QtGui import QColor + +from ...tool import PromptStep, CodeStep, ToolStep +from ...runner import execute_prompt_step, execute_code_step, execute_tool_step +from ...providers import load_providers + + +class StepTestWorker(QThread): + """Background worker for executing step tests.""" + finished = Signal(dict) # Emits result dict + + def __init__(self, step: Union[PromptStep, CodeStep, ToolStep], variables: dict, provider_override: str = None): + super().__init__() + self.step = step + self.variables = variables + self.provider_override = provider_override + + def run(self): + result = { + "success": False, + "output": "", + "output_vars": {}, + "error": None, + "elapsed_ms": 0 + } + + start_time = time.time() + + try: + if isinstance(self.step, PromptStep): + output, success = execute_prompt_step( + self.step, self.variables, self.provider_override + ) + result["success"] = success + result["output"] = output + result["output_vars"] = {self.step.output_var: output} + if not success: + result["error"] = "Provider call failed" + + elif isinstance(self.step, CodeStep): + outputs, success = execute_code_step( + self.step, self.variables, step_num=1 + ) + result["success"] = success + result["output_vars"] = outputs + result["output"] = "\n".join(f"{k} = {v}" for k, v in outputs.items()) + if not success: + result["error"] = "Code execution failed" + + elif isinstance(self.step, ToolStep): + output, success = execute_tool_step( + self.step, self.variables, + depth=0, + provider_override=self.provider_override, + dry_run=False, + verbose=False + ) + result["success"] = success + result["output"] = output + result["output_vars"] = {self.step.output_var: output} + if not success: + result["error"] = f"Tool '{self.step.tool}' execution failed" + + except Exception as e: + result["success"] = False + result["error"] = str(e) + + result["elapsed_ms"] = int((time.time() - start_time) * 1000) + self.finished.emit(result) + + +class TestStepDialog(QDialog): + """Dialog for interactively testing a single step.""" + + # Assertion types available + ASSERTION_TYPES = [ + ("not_empty", "Not Empty", "Output must not be empty"), + ("contains", "Contains", "Output must contain the specified text"), + ("not_contains", "Does Not Contain", "Output must NOT contain the specified text"), + ("equals", "Equals", "Output must exactly equal the expected value"), + ("valid_json", "Valid JSON", "Output must be valid JSON"), + ("valid_python", "Valid Python", "Output must be valid Python syntax"), + ("matches_regex", "Matches Regex", "Output must match the regular expression"), + ("min_length", "Min Length", "Output must be at least N characters"), + ("max_length", "Max Length", "Output must be at most N characters"), + ] + + def __init__(self, parent, step: Union[PromptStep, CodeStep, ToolStep], available_vars: list = None): + super().__init__(parent) + self.step = step + self.available_vars = available_vars or ["input"] + self._worker = None + + # Determine step type for title + if isinstance(step, PromptStep): + step_type = "Prompt" + elif isinstance(step, CodeStep): + step_type = "Code" + elif isinstance(step, ToolStep): + step_type = f"Tool ({step.tool})" + else: + step_type = "Unknown" + + step_name = step.name if step.name else step_type + self.setWindowTitle(f"Test Step: {step_name}") + self.setMinimumSize(800, 700) + + self._setup_ui() + self._detect_variables() + + def _setup_ui(self): + """Set up the dialog UI.""" + layout = QVBoxLayout(self) + layout.setSpacing(12) + + # Main splitter: top (inputs) | bottom (output) + splitter = QSplitter(Qt.Vertical) + + # Top section: Variables and Assertions + top_widget = QWidget() + top_layout = QHBoxLayout(top_widget) + top_layout.setContentsMargins(0, 0, 0, 0) + + # Left: Variables input + vars_group = QGroupBox("Input Variables") + vars_layout = QVBoxLayout(vars_group) + + vars_help = QLabel("Provide test values for variables used in this step:") + vars_help.setStyleSheet("color: #718096; font-size: 11px;") + vars_layout.addWidget(vars_help) + + # Variables form + self.vars_form = QFormLayout() + self.vars_form.setSpacing(8) + self.var_inputs = {} # variable name -> QLineEdit or QPlainTextEdit + vars_layout.addLayout(self.vars_form) + vars_layout.addStretch() + + top_layout.addWidget(vars_group, 1) + + # Right: Assertions + assert_group = QGroupBox("Assertions (Optional)") + assert_layout = QVBoxLayout(assert_group) + + assert_help = QLabel("Define checks to validate the step output:") + assert_help.setStyleSheet("color: #718096; font-size: 11px;") + assert_layout.addWidget(assert_help) + + # Assertions table + self.assertions_table = QTableWidget(0, 3) + self.assertions_table.setHorizontalHeaderLabels(["Type", "Value", ""]) + self.assertions_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeToContents) + self.assertions_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.Stretch) + self.assertions_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.ResizeToContents) + self.assertions_table.verticalHeader().setVisible(False) + self.assertions_table.setMaximumHeight(150) + assert_layout.addWidget(self.assertions_table) + + # Add assertion button + btn_add_assertion = QPushButton("+ Add Assertion") + btn_add_assertion.clicked.connect(self._add_assertion_row) + assert_layout.addWidget(btn_add_assertion) + + top_layout.addWidget(assert_group, 1) + + splitter.addWidget(top_widget) + + # Bottom section: Controls and Output + bottom_widget = QWidget() + bottom_layout = QVBoxLayout(bottom_widget) + bottom_layout.setContentsMargins(0, 0, 0, 0) + + # Controls row + controls_layout = QHBoxLayout() + + # Provider override (for prompt and tool steps) + if isinstance(self.step, (PromptStep, ToolStep)): + controls_layout.addWidget(QLabel("Provider:")) + self.provider_combo = QComboBox() + self.provider_combo.addItem("(use step's default)") + providers = load_providers() + for provider in sorted(providers, key=lambda p: p.name): + self.provider_combo.addItem(provider.name) + # Add common defaults + for default in ["mock"]: + if self.provider_combo.findText(default) < 0: + self.provider_combo.addItem(default) + self.provider_combo.setMinimumWidth(150) + controls_layout.addWidget(self.provider_combo) + else: + self.provider_combo = None + + controls_layout.addStretch() + + # Run button + self.btn_run = QPushButton("Run Step") + self.btn_run.setMinimumHeight(36) + self.btn_run.setMinimumWidth(120) + self.btn_run.clicked.connect(self._run_test) + controls_layout.addWidget(self.btn_run) + + bottom_layout.addLayout(controls_layout) + + # Output section + output_group = QGroupBox("Output") + output_layout = QVBoxLayout(output_group) + + # Status line + self.status_label = QLabel("Click 'Run Step' to test this step") + self.status_label.setStyleSheet("color: #718096;") + output_layout.addWidget(self.status_label) + + # Output display + self.output_display = QTextEdit() + self.output_display.setReadOnly(True) + self.output_display.setPlaceholderText("Step output will appear here...") + font = self.output_display.font() + font.setFamily("Consolas, Monaco, monospace") + self.output_display.setFont(font) + output_layout.addWidget(self.output_display) + + # Assertion results + self.assertion_results = QLabel("") + self.assertion_results.setWordWrap(True) + output_layout.addWidget(self.assertion_results) + + bottom_layout.addWidget(output_group) + + splitter.addWidget(bottom_widget) + splitter.setSizes([300, 400]) + + layout.addWidget(splitter) + + # Dialog buttons + buttons_layout = QHBoxLayout() + buttons_layout.addStretch() + + btn_close = QPushButton("Close") + btn_close.clicked.connect(self.accept) + buttons_layout.addWidget(btn_close) + + layout.addLayout(buttons_layout) + + def _detect_variables(self): + """Detect variables used in the step and create input fields.""" + # Get template text based on step type + template = "" + if isinstance(self.step, PromptStep): + template = self.step.prompt + elif isinstance(self.step, CodeStep): + template = self.step.code + elif isinstance(self.step, ToolStep): + template = self.step.input_template + # Also add args values + for value in self.step.args.values(): + template += " " + str(value) + + # Find all {variable} references (excluding escaped {{ }}) + # Simple regex: match {word} but not {{ or }} + var_pattern = r'\{([a-zA-Z_][a-zA-Z0-9_]*)\}' + found_vars = set(re.findall(var_pattern, template)) + + # Combine with available_vars (from previous steps) + all_vars = sorted(set(self.available_vars) | found_vars) + + # Create input fields for each variable + for var_name in all_vars: + if var_name == "input": + # Use multiline for input + widget = QPlainTextEdit() + widget.setPlaceholderText("Enter test input text...") + widget.setMaximumHeight(80) + else: + widget = QLineEdit() + widget.setPlaceholderText(f"Value for {{{var_name}}}") + + self.var_inputs[var_name] = widget + self.vars_form.addRow(f"{{{var_name}}}:", widget) + + def _add_assertion_row(self): + """Add a new assertion row to the table.""" + row = self.assertions_table.rowCount() + self.assertions_table.insertRow(row) + + # Type dropdown + type_combo = QComboBox() + for type_id, display_name, tooltip in self.ASSERTION_TYPES: + type_combo.addItem(display_name, type_id) + idx = type_combo.count() - 1 + type_combo.setItemData(idx, tooltip, Qt.ToolTipRole) + self.assertions_table.setCellWidget(row, 0, type_combo) + + # Value input + value_edit = QLineEdit() + value_edit.setPlaceholderText("Expected value (if applicable)") + self.assertions_table.setCellWidget(row, 1, value_edit) + + # Remove button + btn_remove = QPushButton("×") + btn_remove.setFixedWidth(30) + btn_remove.clicked.connect(lambda: self._remove_assertion_row(row)) + self.assertions_table.setCellWidget(row, 2, btn_remove) + + def _remove_assertion_row(self, row: int): + """Remove an assertion row.""" + self.assertions_table.removeRow(row) + # Update remove button connections for remaining rows + for i in range(self.assertions_table.rowCount()): + btn = self.assertions_table.cellWidget(i, 2) + if btn: + btn.clicked.disconnect() + btn.clicked.connect(lambda checked=False, r=i: self._remove_assertion_row(r)) + + def _get_assertions(self) -> list: + """Get list of assertions from the table.""" + assertions = [] + for row in range(self.assertions_table.rowCount()): + type_combo = self.assertions_table.cellWidget(row, 0) + value_edit = self.assertions_table.cellWidget(row, 1) + if type_combo: + assertions.append({ + "type": type_combo.currentData(), + "display": type_combo.currentText(), + "value": value_edit.text() if value_edit else "" + }) + return assertions + + def _run_test(self): + """Run the step test.""" + # Collect variable values + variables = {} + for var_name, widget in self.var_inputs.items(): + if isinstance(widget, QPlainTextEdit): + variables[var_name] = widget.toPlainText() + else: + variables[var_name] = widget.text() + + # Get provider override + provider_override = None + if self.provider_combo and self.provider_combo.currentIndex() > 0: + provider_override = self.provider_combo.currentText() + + # Disable run button and show loading + self.btn_run.setEnabled(False) + self.btn_run.setText("Running...") + self.status_label.setText("Executing step...") + self.status_label.setStyleSheet("color: #718096;") + self.output_display.clear() + self.assertion_results.clear() + + # Start worker thread + self._worker = StepTestWorker(self.step, variables, provider_override) + self._worker.finished.connect(self._on_test_finished) + self._worker.start() + + def _on_test_finished(self, result: dict): + """Handle test completion.""" + self.btn_run.setEnabled(True) + self.btn_run.setText("Run Step") + + # Display result + if result["success"]: + self.status_label.setText(f"✓ Step completed in {result['elapsed_ms']}ms") + self.status_label.setStyleSheet("color: #38a169; font-weight: bold;") + + # Show output + output_text = result["output"] + if result["output_vars"]: + output_text += "\n\n--- Output Variables ---\n" + for var, value in result["output_vars"].items(): + preview = value[:200] + "..." if len(value) > 200 else value + output_text += f"{var} = {preview}\n" + self.output_display.setPlainText(output_text) + else: + self.status_label.setText(f"✗ Step failed ({result['elapsed_ms']}ms)") + self.status_label.setStyleSheet("color: #e53e3e; font-weight: bold;") + + error_text = result.get("error", "Unknown error") + self.output_display.setHtml(f"Error: {error_text}") + + # Run assertions + assertions = self._get_assertions() + if assertions and result["success"]: + self._run_assertions(result["output"], assertions) + + def _run_assertions(self, output: str, assertions: list): + """Run assertions against the output.""" + results = [] + all_passed = True + + for assertion in assertions: + a_type = assertion["type"] + a_value = assertion["value"] + a_display = assertion["display"] + passed = False + message = "" + + try: + if a_type == "not_empty": + passed = bool(output.strip()) + message = "Output is not empty" if passed else "Output is empty" + + elif a_type == "contains": + passed = a_value in output + message = f"Output contains '{a_value}'" if passed else f"Output does not contain '{a_value}'" + + elif a_type == "not_contains": + passed = a_value not in output + message = f"Output does not contain '{a_value}'" if passed else f"Output contains '{a_value}'" + + elif a_type == "equals": + passed = output.strip() == a_value.strip() + message = "Output equals expected" if passed else "Output does not equal expected" + + elif a_type == "valid_json": + try: + json.loads(output) + passed = True + message = "Output is valid JSON" + except json.JSONDecodeError as e: + passed = False + message = f"Invalid JSON: {e}" + + elif a_type == "valid_python": + try: + import ast + ast.parse(output) + passed = True + message = "Output is valid Python" + except SyntaxError as e: + passed = False + message = f"Invalid Python: {e}" + + elif a_type == "matches_regex": + try: + passed = bool(re.search(a_value, output)) + message = f"Output matches regex" if passed else f"Output does not match regex" + except re.error as e: + passed = False + message = f"Invalid regex: {e}" + + elif a_type == "min_length": + try: + min_len = int(a_value) + passed = len(output) >= min_len + message = f"Length {len(output)} >= {min_len}" if passed else f"Length {len(output)} < {min_len}" + except ValueError: + passed = False + message = "Invalid minimum length value" + + elif a_type == "max_length": + try: + max_len = int(a_value) + passed = len(output) <= max_len + message = f"Length {len(output)} <= {max_len}" if passed else f"Length {len(output)} > {max_len}" + except ValueError: + passed = False + message = "Invalid maximum length value" + + except Exception as e: + passed = False + message = f"Error: {e}" + + if not passed: + all_passed = False + + results.append((a_display, passed, message)) + + # Display results + result_html = "Assertion Results:
" + for display, passed, message in results: + icon = "✓" if passed else "✗" + color = "#38a169" if passed else "#e53e3e" + result_html += f"{icon} {display}: {message}
" + + if all_passed: + result_html = f"All {len(results)} assertion(s) passed!
" + result_html + else: + failed_count = sum(1 for _, p, _ in results if not p) + result_html = f"{failed_count} of {len(results)} assertion(s) failed
" + result_html + + self.assertion_results.setText(result_html) diff --git a/src/cmdforge/gui/pages/tool_builder_page.py b/src/cmdforge/gui/pages/tool_builder_page.py index f8ee2e8..145ec9c 100644 --- a/src/cmdforge/gui/pages/tool_builder_page.py +++ b/src/cmdforge/gui/pages/tool_builder_page.py @@ -233,6 +233,12 @@ class ToolBuilderPage(QWidget): self.btn_edit_step.clicked.connect(self._edit_step) steps_btns.addWidget(self.btn_edit_step) + self.btn_test_step = QPushButton("Test") + self.btn_test_step.setObjectName("secondary") + self.btn_test_step.setToolTip("Test the selected step with custom input") + self.btn_test_step.clicked.connect(self._test_step) + steps_btns.addWidget(self.btn_test_step) + self.btn_del_step = QPushButton("Delete") self.btn_del_step.setObjectName("danger") self.btn_del_step.setToolTip("Delete the selected step") @@ -691,6 +697,21 @@ class ToolBuilderPage(QWidget): self._add_tool_dependency(new_step.tool) self._refresh_steps() + def _test_step(self): + """Test the selected step with custom input.""" + items = self.steps_list.selectedItems() + if not items: + QMessageBox.information(self, "Test Step", "Please select a step to test") + return + + step = items[0].data(Qt.UserRole) + idx = self.steps_list.row(items[0]) + available_vars = self._get_available_vars(up_to_step=idx) + + from ..dialogs.test_step_dialog import TestStepDialog + dialog = TestStepDialog(self, step, available_vars=available_vars) + dialog.exec() + def _delete_step(self): """Delete selected step.""" items = self.steps_list.selectedItems()