"""Parser for extracting elements from PlantUML code.""" import re from typing import List, Tuple, Optional from dataclasses import dataclass from .dialogs import DiagramElement, DiagramRelationship def detect_diagram_type(code: str) -> str: """Detect the diagram type from PlantUML code.""" code_lower = code.lower() if '@startmindmap' in code_lower: return 'mindmap' if '@startwbs' in code_lower: return 'wbs' if '@startgantt' in code_lower: return 'gantt' # Check for diagram-specific keywords if re.search(r'\bparticipant\b|\bactor\b.*->|->.*:', code, re.IGNORECASE): return 'sequence' if re.search(r'\bcomponent\b|\[\w+\]', code): return 'component' if re.search(r'\busecase\b|\(\w+\)', code): return 'usecase' if re.search(r'\bstate\b|\[\*\]', code): return 'state' if re.search(r'\bentity\b.*\{|\|\|--|--\|\|', code): return 'er' if re.search(r'\bclass\b|\binterface\b|\benum\b', code): return 'class' return 'class' # Default def parse_elements(code: str) -> List[DiagramElement]: """Extract diagram elements from PlantUML code.""" elements = [] # Parse class-like elements: class Name { ... } class_pattern = r'(class|interface|enum|abstract\s+class|abstract)\s+(\w+)(?:\s*<<(\w+)>>)?(?:\s*\{([^}]*)\})?' for match in re.finditer(class_pattern, code, re.MULTILINE | re.DOTALL): element_type = match.group(1).replace('abstract class', 'abstract').strip() name = match.group(2) stereotype = match.group(3) or "" body = match.group(4) or "" attributes = [] methods = [] if body: for line in body.strip().split('\n'): line = line.strip() if not line or line == '--': continue if '(' in line: methods.append(line) else: attributes.append(line) elements.append(DiagramElement( element_type=element_type, name=name, stereotype=stereotype, attributes=attributes, methods=methods, )) # Parse sequence diagram participants participant_pattern = r'(participant|actor|boundary|control|entity|database|collections|queue)\s+"?([^"\n]+)"?(?:\s+as\s+(\w+))?(?:\s*<<(\w+)>>)?' for match in re.finditer(participant_pattern, code, re.MULTILINE): element_type = match.group(1) name = match.group(2).strip() alias = match.group(3) or "" stereotype = match.group(4) or "" # Avoid duplicates if not any(e.name == name for e in elements): elements.append(DiagramElement( element_type=element_type, name=name, alias=alias, stereotype=stereotype, )) # Parse component diagram components component_pattern = r'component\s*\[([^\]]+)\](?:\s+as\s+(\w+))?' for match in re.finditer(component_pattern, code): name = match.group(1) alias = match.group(2) or "" if not any(e.name == name for e in elements): elements.append(DiagramElement( element_type='component', name=name, alias=alias, )) # Parse use case elements usecase_pattern = r'usecase\s*\(([^)]+)\)(?:\s+as\s+(\w+))?' for match in re.finditer(usecase_pattern, code): name = match.group(1) alias = match.group(2) or "" if not any(e.name == name for e in elements): elements.append(DiagramElement( element_type='usecase', name=name, alias=alias, )) # Parse actor elements (standalone) actor_pattern = r'actor\s+"?(\w+)"?(?:\s+as\s+(\w+))?' for match in re.finditer(actor_pattern, code): name = match.group(1) alias = match.group(2) or "" if not any(e.name == name for e in elements): elements.append(DiagramElement( element_type='actor', name=name, alias=alias, )) # Parse state elements state_pattern = r'state\s+"?([^"\n{]+)"?(?:\s+as\s+(\w+))?' for match in re.finditer(state_pattern, code): name = match.group(1).strip() alias = match.group(2) or "" if not any(e.name == name for e in elements): elements.append(DiagramElement( element_type='state', name=name, alias=alias, )) # Parse package/namespace containers package_pattern = r'(package|namespace|folder|frame|cloud|node|rectangle)\s+"?([^"\n{]+)"?(?:\s*\{)?' for match in re.finditer(package_pattern, code): element_type = match.group(1) name = match.group(2).strip() if not any(e.name == name for e in elements): elements.append(DiagramElement( element_type=element_type, name=name, )) return elements def parse_relationships(code: str) -> List[DiagramRelationship]: """Extract relationships from PlantUML code.""" relationships = [] # Common relationship patterns # Matches: A --> B, A --|> B, A ..> B : label, etc. rel_pattern = r'(\w+)\s*([-.<>|*o]+)\s*(\w+)(?:\s*:\s*(.+))?$' for line in code.split('\n'): line = line.strip() # Skip comments and directives if line.startswith("'") or line.startswith('@') or not line: continue match = re.match(rel_pattern, line) if match: from_el = match.group(1) rel_type = match.group(2) to_el = match.group(3) label = match.group(4) or "" # Filter out keywords that aren't elements keywords = {'class', 'interface', 'enum', 'abstract', 'package', 'component', 'participant', 'actor', 'state', 'entity', 'usecase'} if from_el.lower() in keywords or to_el.lower() in keywords: continue relationships.append(DiagramRelationship( from_element=from_el, to_element=to_el, relationship_type=rel_type, label=label.strip(), )) return relationships def get_element_names(code: str) -> List[str]: """Get just the names of all elements in the diagram.""" elements = parse_elements(code) names = [] for el in elements: names.append(el.name) if el.alias: names.append(el.alias) return names def find_element_in_code(code: str, element_name: str) -> Optional[Tuple[int, int]]: """Find the line range of an element definition in code. Returns (start_line, end_line) or None if not found. """ lines = code.split('\n') # Look for class-like definitions in_element = False start_line = None brace_count = 0 for i, line in enumerate(lines): # Check for element start if re.search(rf'\b(class|interface|enum|abstract)\s+{re.escape(element_name)}\b', line): start_line = i if '{' in line: in_element = True brace_count = line.count('{') - line.count('}') if brace_count == 0: return (start_line, i) else: return (start_line, i) continue # Check for other element types if re.search(rf'(participant|actor|component|usecase|state)\s+"?{re.escape(element_name)}"?', line): return (i, i) # Track braces if inside element if in_element: brace_count += line.count('{') - line.count('}') if brace_count <= 0: return (start_line, i) if start_line is not None: return (start_line, start_line) return None def insert_element_code(code: str, element_code: str) -> str: """Insert element code before @enduml or @endmindmap.""" # Find the end directive end_patterns = ['@enduml', '@endmindmap', '@endwbs', '@endgantt'] for pattern in end_patterns: if pattern in code.lower(): # Find position (case-insensitive) idx = code.lower().rfind(pattern) # Insert before the end directive with proper spacing return code[:idx] + element_code + '\n\n' + code[idx:] # If no end directive, append return code + '\n' + element_code def replace_element_in_code(code: str, old_name: str, new_code: str) -> str: """Replace an element definition in the code.""" location = find_element_in_code(code, old_name) if location: lines = code.split('\n') start, end = location new_lines = lines[:start] + [new_code] + lines[end + 1:] return '\n'.join(new_lines) return code def delete_element_from_code(code: str, element_name: str) -> str: """Delete an element and its relationships from code.""" location = find_element_in_code(code, element_name) if location: lines = code.split('\n') start, end = location # Remove element lines del lines[start:end + 1] # Remove relationships involving this element new_lines = [] for line in lines: # Check if line is a relationship involving this element if re.search(rf'\b{re.escape(element_name)}\b\s*[-.<>|*o]+', line): continue if re.search(rf'[-.<>|*o]+\s*\b{re.escape(element_name)}\b', line): continue new_lines.append(line) return '\n'.join(new_lines) return code