fix: use regex to extract markers anywhere in text, not just at line start

- Added DECISION_PATTERN, QUESTION_PATTERN, ACTION_PATTERN regexes - Support both plain (DECISION:) and markdown bold (**DECISION**:) formats - Markers now detected anywhere in text, not just at line start - Removed analysis_normalized since regex handles both variants directly - Kept legacy support for ASSIGNED: and DONE: at line start - Updated docstring to reflect regex-based approach
2025-11-02 11:58:06 -04:00 · 2025-11-02 11:58:06 -04:00 · 0601117371
parent 2d53cfb09d
commit 0601117371
1 changed files with 60 additions and 43 deletions
--- a/automation/workflow.py
+++ b/automation/workflow.py
@ -33,12 +33,25 @@ DISCUSSION_SUFFIXES = (
 SUMMARY_SUFFIX = ".sum.md"
 MENTION_PATTERN = re.compile(r"@(\w+|all)")
 # Patterns to extract structured markers (support both plain and markdown bold)
 # Matches: DECISION: text, **DECISION**: text, decision: text
 DECISION_PATTERN = re.compile(r'(?:\*\*)?DECISION(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*QUESTION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
 QUESTION_PATTERN = re.compile(r'(?:\*\*)?(?:QUESTION|Q)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
 ACTION_PATTERN = re.compile(r'(?:\*\*)?(?:ACTION|TODO)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*QUESTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
 def extract_structured_basic(text: str) -> dict[str, list]:
    """
-    Derive structured discussion signals using lightweight pattern matching.
+    Derive structured discussion signals using regex pattern matching.
-    Recognises explicit markers (Q:, TODO:, DONE:, DECISION:) and @mentions.
+    Recognises markers anywhere in text (not just at line start):
    - DECISION: / **DECISION**: - Architectural/technical decisions
    - QUESTION: / **QUESTION**: / Q: - Open questions needing answers
    - ACTION: / **ACTION**: / TODO: - Action items with optional @assignee
    - @mentions - References to participants
    Also supports legacy line-start markers: ASSIGNED:, DONE:
    Questions ending with '?' are auto-detected.
    """
    questions: list[dict[str, str]] = []
    action_items: list[dict[str, str]] = []
@ -66,7 +79,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
        analysis = remainder.strip() if participant else stripped
        if not analysis:
            continue
        lowered = analysis.lower()
        participant_name = participant or "unknown"
        if timeline_data is None:
@ -75,30 +87,47 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                "summary": _truncate_summary(analysis),
            }
-        # Questions
+        # Extract decisions using regex (finds markers anywhere in line)
-        if lowered.startswith("q:") or lowered.startswith("question:"):
+        for match in DECISION_PATTERN.finditer(analysis):
-            _, _, body = analysis.partition(":")
+            decision_text = match.group(1).strip()
-            question_text = body.strip()
+            if decision_text:
-            if question_text:
+                decisions.append(
-                questions.append(
+                    {
-                    {"participant": participant_name, "question": question_text, "status": "OPEN"}
+                        "participant": participant_name,
                        "decision": decision_text,
                        "rationale": "",
                        "supporters": [],
                    }
                )
-        elif analysis.endswith("?"):
+
-            question_text = analysis.rstrip("?").strip()
+        # Extract questions using regex (finds markers anywhere in line)
        for match in QUESTION_PATTERN.finditer(analysis):
            question_text = match.group(1).strip()
            if question_text:
                questions.append(
                    {"participant": participant_name, "question": question_text, "status": "OPEN"}
                )
-        # Action items
+        # Also catch questions that end with '?' and don't have explicit marker
-        if lowered.startswith(("todo:", "action:")):
+        if '?' in analysis and not QUESTION_PATTERN.search(analysis):
-            _, _, body = analysis.partition(":")
+            # Simple heuristic: if line ends with ?, treat as question
-            action_text = body.strip()
+            if analysis.rstrip().endswith('?'):
                question_text = analysis.rstrip('?').strip()
                # Avoid duplicate if already extracted
                if question_text and not any(q['question'] == question_text for q in questions):
                    questions.append(
                        {"participant": participant_name, "question": question_text, "status": "OPEN"}
                    )
        # Extract action items using regex (finds markers anywhere in line)
        for match in ACTION_PATTERN.finditer(analysis):
            action_text = match.group(1).strip()
            if action_text:
                # Extract assignee from @mention in the line
                assignee = None
-                match = MENTION_PATTERN.search(line)
+                mention_match = MENTION_PATTERN.search(action_text)
-                if match:
+                if mention_match:
-                    assignee = match.group(1)
+                    assignee = mention_match.group(1)
                action_items.append(
                    {
                        "participant": participant_name,
@ -107,15 +136,17 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                        "assignee": assignee,
                    }
                )
-        elif lowered.startswith("assigned:"):
+
-            _, _, body = analysis.partition(":")
+        # Legacy support for plain text markers at line start
-            action_text = body.strip()
+        lowered = analysis.lower()
        if lowered.startswith("assigned:"):
            _, _, action_text = analysis.partition(":")
            action_text = action_text.strip()
            if action_text:
-                # Extract assignee from @mention in the line
+                assignee = participant_name
-                assignee = participant_name  # Default to participant claiming it
+                mention_match = MENTION_PATTERN.search(line)
-                match = MENTION_PATTERN.search(line)
+                if mention_match:
-                if match:
+                    assignee = mention_match.group(1)
                    assignee = match.group(1)
                action_items.append(
                    {
                        "participant": participant_name,
@ -125,8 +156,8 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                    }
                )
        elif lowered.startswith("done:"):
-            _, _, body = analysis.partition(":")
+            _, _, action_text = analysis.partition(":")
-            action_text = body.strip()
+            action_text = action_text.strip()
            if action_text:
                action_items.append(
                    {
@ -137,20 +168,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                    }
                )
        # Decisions
        if lowered.startswith("decision:"):
            _, _, body = analysis.partition(":")
            decision_text = body.strip()
            if decision_text:
                decisions.append(
                    {
                        "participant": participant_name,
                        "decision": decision_text,
                        "rationale": "",
                        "supporters": [],
                    }
                )
        # Mentions
        for match in MENTION_PATTERN.finditer(line):
            mentions.append(