fix: use regex to extract markers anywhere in text, not just at line start

- Added DECISION_PATTERN, QUESTION_PATTERN, ACTION_PATTERN regexes - Support both plain (DECISION:) and markdown bold (**DECISION**:) formats - Markers now detected anywhere in text, not just at line start - Removed analysis_normalized since regex handles both variants directly - Kept legacy support for ASSIGNED: and DONE: at line start - Updated docstring to reflect regex-based approach
2025-11-02 11:58:06 -04:00 · 2025-11-02 11:58:06 -04:00 · 0601117371
parent 2d53cfb09d
commit 0601117371
1 changed files with 60 additions and 43 deletions
--- a/automation/workflow.py
+++ b/automation/workflow.py
@ -33,12 +33,25 @@ DISCUSSION_SUFFIXES = (
 SUMMARY_SUFFIX = ".sum.md"
 MENTION_PATTERN = re.compile(r"@(\w+|all)")

+# Patterns to extract structured markers (support both plain and markdown bold)
+# Matches: DECISION: text, **DECISION**: text, decision: text
+DECISION_PATTERN = re.compile(r'(?:\*\*)?DECISION(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*QUESTION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
+QUESTION_PATTERN = re.compile(r'(?:\*\*)?(?:QUESTION|Q)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
+ACTION_PATTERN = re.compile(r'(?:\*\*)?(?:ACTION|TODO)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*QUESTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
+

 def extract_structured_basic(text: str) -> dict[str, list]:
    """
-    Derive structured discussion signals using lightweight pattern matching.
+    Derive structured discussion signals using regex pattern matching.

-    Recognises explicit markers (Q:, TODO:, DONE:, DECISION:) and @mentions.
+    Recognises markers anywhere in text (not just at line start):
+    - DECISION: / **DECISION**: - Architectural/technical decisions
+    - QUESTION: / **QUESTION**: / Q: - Open questions needing answers
+    - ACTION: / **ACTION**: / TODO: - Action items with optional @assignee
+    - @mentions - References to participants
+
+    Also supports legacy line-start markers: ASSIGNED:, DONE:
+    Questions ending with '?' are auto-detected.
    """
    questions: list[dict[str, str]] = []
    action_items: list[dict[str, str]] = []
@ -66,7 +79,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
        analysis = remainder.strip() if participant else stripped
        if not analysis:
            continue
-        lowered = analysis.lower()
        participant_name = participant or "unknown"

        if timeline_data is None:
@ -75,30 +87,47 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                "summary": _truncate_summary(analysis),
            }

-        # Questions
-        if lowered.startswith("q:") or lowered.startswith("question:"):
-            _, _, body = analysis.partition(":")
-            question_text = body.strip()
-            if question_text:
-                questions.append(
-                    {"participant": participant_name, "question": question_text, "status": "OPEN"}
+        # Extract decisions using regex (finds markers anywhere in line)
+        for match in DECISION_PATTERN.finditer(analysis):
+            decision_text = match.group(1).strip()
+            if decision_text:
+                decisions.append(
+                    {
+                        "participant": participant_name,
+                        "decision": decision_text,
+                        "rationale": "",
+                        "supporters": [],
+                    }
                )
-        elif analysis.endswith("?"):
-            question_text = analysis.rstrip("?").strip()
+
+        # Extract questions using regex (finds markers anywhere in line)
+        for match in QUESTION_PATTERN.finditer(analysis):
+            question_text = match.group(1).strip()
            if question_text:
                questions.append(
                    {"participant": participant_name, "question": question_text, "status": "OPEN"}
                )

-        # Action items
-        if lowered.startswith(("todo:", "action:")):
-            _, _, body = analysis.partition(":")
-            action_text = body.strip()
+        # Also catch questions that end with '?' and don't have explicit marker
+        if '?' in analysis and not QUESTION_PATTERN.search(analysis):
+            # Simple heuristic: if line ends with ?, treat as question
+            if analysis.rstrip().endswith('?'):
+                question_text = analysis.rstrip('?').strip()
+                # Avoid duplicate if already extracted
+                if question_text and not any(q['question'] == question_text for q in questions):
+                    questions.append(
+                        {"participant": participant_name, "question": question_text, "status": "OPEN"}
+                    )
+
+        # Extract action items using regex (finds markers anywhere in line)
+        for match in ACTION_PATTERN.finditer(analysis):
+            action_text = match.group(1).strip()
            if action_text:
+                # Extract assignee from @mention in the line
                assignee = None
-                match = MENTION_PATTERN.search(line)
-                if match:
-                    assignee = match.group(1)
+                mention_match = MENTION_PATTERN.search(action_text)
+                if mention_match:
+                    assignee = mention_match.group(1)
                action_items.append(
                    {
                        "participant": participant_name,
@ -107,15 +136,17 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                        "assignee": assignee,
                    }
                )
-        elif lowered.startswith("assigned:"):
-            _, _, body = analysis.partition(":")
-            action_text = body.strip()
+
+        # Legacy support for plain text markers at line start
+        lowered = analysis.lower()
+        if lowered.startswith("assigned:"):
+            _, _, action_text = analysis.partition(":")
+            action_text = action_text.strip()
            if action_text:
-                # Extract assignee from @mention in the line
-                assignee = participant_name  # Default to participant claiming it
-                match = MENTION_PATTERN.search(line)
-                if match:
-                    assignee = match.group(1)
+                assignee = participant_name
+                mention_match = MENTION_PATTERN.search(line)
+                if mention_match:
+                    assignee = mention_match.group(1)
                action_items.append(
                    {
                        "participant": participant_name,
@ -125,8 +156,8 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                    }
                )
        elif lowered.startswith("done:"):
-            _, _, body = analysis.partition(":")
-            action_text = body.strip()
+            _, _, action_text = analysis.partition(":")
+            action_text = action_text.strip()
            if action_text:
                action_items.append(
                    {
@ -137,20 +168,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
                    }
                )

-        # Decisions
-        if lowered.startswith("decision:"):
-            _, _, body = analysis.partition(":")
-            decision_text = body.strip()
-            if decision_text:
-                decisions.append(
-                    {
-                        "participant": participant_name,
-                        "decision": decision_text,
-                        "rationale": "",
-                        "supporters": [],
-                    }
-                )
-
        # Mentions
        for match in MENTION_PATTERN.finditer(line):
            mentions.append(