orchestrated-discussions/.venv/lib/python3.12/site-packages/artifact_editor/parser.py

299 lines
9.5 KiB
Python

"""Parser for extracting elements from PlantUML code."""
import re
from typing import List, Tuple, Optional
from dataclasses import dataclass
from .dialogs import DiagramElement, DiagramRelationship
def detect_diagram_type(code: str) -> str:
"""Detect the diagram type from PlantUML code."""
code_lower = code.lower()
if '@startmindmap' in code_lower:
return 'mindmap'
if '@startwbs' in code_lower:
return 'wbs'
if '@startgantt' in code_lower:
return 'gantt'
# Check for diagram-specific keywords
if re.search(r'\bparticipant\b|\bactor\b.*->|->.*:', code, re.IGNORECASE):
return 'sequence'
if re.search(r'\bcomponent\b|\[\w+\]', code):
return 'component'
if re.search(r'\busecase\b|\(\w+\)', code):
return 'usecase'
if re.search(r'\bstate\b|\[\*\]', code):
return 'state'
if re.search(r'\bentity\b.*\{|\|\|--|--\|\|', code):
return 'er'
if re.search(r'\bclass\b|\binterface\b|\benum\b', code):
return 'class'
return 'class' # Default
def parse_elements(code: str) -> List[DiagramElement]:
"""Extract diagram elements from PlantUML code."""
elements = []
# Parse class-like elements: class Name { ... }
class_pattern = r'(class|interface|enum|abstract\s+class|abstract)\s+(\w+)(?:\s*<<(\w+)>>)?(?:\s*\{([^}]*)\})?'
for match in re.finditer(class_pattern, code, re.MULTILINE | re.DOTALL):
element_type = match.group(1).replace('abstract class', 'abstract').strip()
name = match.group(2)
stereotype = match.group(3) or ""
body = match.group(4) or ""
attributes = []
methods = []
if body:
for line in body.strip().split('\n'):
line = line.strip()
if not line or line == '--':
continue
if '(' in line:
methods.append(line)
else:
attributes.append(line)
elements.append(DiagramElement(
element_type=element_type,
name=name,
stereotype=stereotype,
attributes=attributes,
methods=methods,
))
# Parse sequence diagram participants
participant_pattern = r'(participant|actor|boundary|control|entity|database|collections|queue)\s+"?([^"\n]+)"?(?:\s+as\s+(\w+))?(?:\s*<<(\w+)>>)?'
for match in re.finditer(participant_pattern, code, re.MULTILINE):
element_type = match.group(1)
name = match.group(2).strip()
alias = match.group(3) or ""
stereotype = match.group(4) or ""
# Avoid duplicates
if not any(e.name == name for e in elements):
elements.append(DiagramElement(
element_type=element_type,
name=name,
alias=alias,
stereotype=stereotype,
))
# Parse component diagram components
component_pattern = r'component\s*\[([^\]]+)\](?:\s+as\s+(\w+))?'
for match in re.finditer(component_pattern, code):
name = match.group(1)
alias = match.group(2) or ""
if not any(e.name == name for e in elements):
elements.append(DiagramElement(
element_type='component',
name=name,
alias=alias,
))
# Parse use case elements
usecase_pattern = r'usecase\s*\(([^)]+)\)(?:\s+as\s+(\w+))?'
for match in re.finditer(usecase_pattern, code):
name = match.group(1)
alias = match.group(2) or ""
if not any(e.name == name for e in elements):
elements.append(DiagramElement(
element_type='usecase',
name=name,
alias=alias,
))
# Parse actor elements (standalone)
actor_pattern = r'actor\s+"?(\w+)"?(?:\s+as\s+(\w+))?'
for match in re.finditer(actor_pattern, code):
name = match.group(1)
alias = match.group(2) or ""
if not any(e.name == name for e in elements):
elements.append(DiagramElement(
element_type='actor',
name=name,
alias=alias,
))
# Parse state elements
state_pattern = r'state\s+"?([^"\n{]+)"?(?:\s+as\s+(\w+))?'
for match in re.finditer(state_pattern, code):
name = match.group(1).strip()
alias = match.group(2) or ""
if not any(e.name == name for e in elements):
elements.append(DiagramElement(
element_type='state',
name=name,
alias=alias,
))
# Parse package/namespace containers
package_pattern = r'(package|namespace|folder|frame|cloud|node|rectangle)\s+"?([^"\n{]+)"?(?:\s*\{)?'
for match in re.finditer(package_pattern, code):
element_type = match.group(1)
name = match.group(2).strip()
if not any(e.name == name for e in elements):
elements.append(DiagramElement(
element_type=element_type,
name=name,
))
return elements
def parse_relationships(code: str) -> List[DiagramRelationship]:
"""Extract relationships from PlantUML code."""
relationships = []
# Common relationship patterns
# Matches: A --> B, A --|> B, A ..> B : label, etc.
rel_pattern = r'(\w+)\s*([-.<>|*o]+)\s*(\w+)(?:\s*:\s*(.+))?$'
for line in code.split('\n'):
line = line.strip()
# Skip comments and directives
if line.startswith("'") or line.startswith('@') or not line:
continue
match = re.match(rel_pattern, line)
if match:
from_el = match.group(1)
rel_type = match.group(2)
to_el = match.group(3)
label = match.group(4) or ""
# Filter out keywords that aren't elements
keywords = {'class', 'interface', 'enum', 'abstract', 'package', 'component',
'participant', 'actor', 'state', 'entity', 'usecase'}
if from_el.lower() in keywords or to_el.lower() in keywords:
continue
relationships.append(DiagramRelationship(
from_element=from_el,
to_element=to_el,
relationship_type=rel_type,
label=label.strip(),
))
return relationships
def get_element_names(code: str) -> List[str]:
"""Get just the names of all elements in the diagram."""
elements = parse_elements(code)
names = []
for el in elements:
names.append(el.name)
if el.alias:
names.append(el.alias)
return names
def find_element_in_code(code: str, element_name: str) -> Optional[Tuple[int, int]]:
"""Find the line range of an element definition in code.
Returns (start_line, end_line) or None if not found.
"""
lines = code.split('\n')
# Look for class-like definitions
in_element = False
start_line = None
brace_count = 0
for i, line in enumerate(lines):
# Check for element start
if re.search(rf'\b(class|interface|enum|abstract)\s+{re.escape(element_name)}\b', line):
start_line = i
if '{' in line:
in_element = True
brace_count = line.count('{') - line.count('}')
if brace_count == 0:
return (start_line, i)
else:
return (start_line, i)
continue
# Check for other element types
if re.search(rf'(participant|actor|component|usecase|state)\s+"?{re.escape(element_name)}"?', line):
return (i, i)
# Track braces if inside element
if in_element:
brace_count += line.count('{') - line.count('}')
if brace_count <= 0:
return (start_line, i)
if start_line is not None:
return (start_line, start_line)
return None
def insert_element_code(code: str, element_code: str) -> str:
"""Insert element code before @enduml or @endmindmap."""
# Find the end directive
end_patterns = ['@enduml', '@endmindmap', '@endwbs', '@endgantt']
for pattern in end_patterns:
if pattern in code.lower():
# Find position (case-insensitive)
idx = code.lower().rfind(pattern)
# Insert before the end directive with proper spacing
return code[:idx] + element_code + '\n\n' + code[idx:]
# If no end directive, append
return code + '\n' + element_code
def replace_element_in_code(code: str, old_name: str, new_code: str) -> str:
"""Replace an element definition in the code."""
location = find_element_in_code(code, old_name)
if location:
lines = code.split('\n')
start, end = location
new_lines = lines[:start] + [new_code] + lines[end + 1:]
return '\n'.join(new_lines)
return code
def delete_element_from_code(code: str, element_name: str) -> str:
"""Delete an element and its relationships from code."""
location = find_element_in_code(code, element_name)
if location:
lines = code.split('\n')
start, end = location
# Remove element lines
del lines[start:end + 1]
# Remove relationships involving this element
new_lines = []
for line in lines:
# Check if line is a relationship involving this element
if re.search(rf'\b{re.escape(element_name)}\b\s*[-.<>|*o]+', line):
continue
if re.search(rf'[-.<>|*o]+\s*\b{re.escape(element_name)}\b', line):
continue
new_lines.append(line)
return '\n'.join(new_lines)
return code