fix: use regex to extract markers anywhere in text, not just at line start

- Added DECISION_PATTERN, QUESTION_PATTERN, ACTION_PATTERN regexes
- Support both plain (DECISION:) and markdown bold (**DECISION**:) formats
- Markers now detected anywhere in text, not just at line start
- Removed analysis_normalized since regex handles both variants directly
- Kept legacy support for ASSIGNED: and DONE: at line start
- Updated docstring to reflect regex-based approach
This commit is contained in:
rob 2025-11-02 11:58:06 -04:00
parent 2d53cfb09d
commit 0601117371
1 changed files with 60 additions and 43 deletions

View File

@ -33,12 +33,25 @@ DISCUSSION_SUFFIXES = (
SUMMARY_SUFFIX = ".sum.md"
MENTION_PATTERN = re.compile(r"@(\w+|all)")
# Patterns to extract structured markers (support both plain and markdown bold)
# Matches: DECISION: text, **DECISION**: text, decision: text
DECISION_PATTERN = re.compile(r'(?:\*\*)?DECISION(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*QUESTION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
QUESTION_PATTERN = re.compile(r'(?:\*\*)?(?:QUESTION|Q)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
ACTION_PATTERN = re.compile(r'(?:\*\*)?(?:ACTION|TODO)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*QUESTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
def extract_structured_basic(text: str) -> dict[str, list]:
"""
Derive structured discussion signals using lightweight pattern matching.
Derive structured discussion signals using regex pattern matching.
Recognises explicit markers (Q:, TODO:, DONE:, DECISION:) and @mentions.
Recognises markers anywhere in text (not just at line start):
- DECISION: / **DECISION**: - Architectural/technical decisions
- QUESTION: / **QUESTION**: / Q: - Open questions needing answers
- ACTION: / **ACTION**: / TODO: - Action items with optional @assignee
- @mentions - References to participants
Also supports legacy line-start markers: ASSIGNED:, DONE:
Questions ending with '?' are auto-detected.
"""
questions: list[dict[str, str]] = []
action_items: list[dict[str, str]] = []
@ -66,7 +79,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
analysis = remainder.strip() if participant else stripped
if not analysis:
continue
lowered = analysis.lower()
participant_name = participant or "unknown"
if timeline_data is None:
@ -75,30 +87,47 @@ def extract_structured_basic(text: str) -> dict[str, list]:
"summary": _truncate_summary(analysis),
}
# Questions
if lowered.startswith("q:") or lowered.startswith("question:"):
_, _, body = analysis.partition(":")
question_text = body.strip()
if question_text:
questions.append(
{"participant": participant_name, "question": question_text, "status": "OPEN"}
# Extract decisions using regex (finds markers anywhere in line)
for match in DECISION_PATTERN.finditer(analysis):
decision_text = match.group(1).strip()
if decision_text:
decisions.append(
{
"participant": participant_name,
"decision": decision_text,
"rationale": "",
"supporters": [],
}
)
elif analysis.endswith("?"):
question_text = analysis.rstrip("?").strip()
# Extract questions using regex (finds markers anywhere in line)
for match in QUESTION_PATTERN.finditer(analysis):
question_text = match.group(1).strip()
if question_text:
questions.append(
{"participant": participant_name, "question": question_text, "status": "OPEN"}
)
# Action items
if lowered.startswith(("todo:", "action:")):
_, _, body = analysis.partition(":")
action_text = body.strip()
# Also catch questions that end with '?' and don't have explicit marker
if '?' in analysis and not QUESTION_PATTERN.search(analysis):
# Simple heuristic: if line ends with ?, treat as question
if analysis.rstrip().endswith('?'):
question_text = analysis.rstrip('?').strip()
# Avoid duplicate if already extracted
if question_text and not any(q['question'] == question_text for q in questions):
questions.append(
{"participant": participant_name, "question": question_text, "status": "OPEN"}
)
# Extract action items using regex (finds markers anywhere in line)
for match in ACTION_PATTERN.finditer(analysis):
action_text = match.group(1).strip()
if action_text:
# Extract assignee from @mention in the line
assignee = None
match = MENTION_PATTERN.search(line)
if match:
assignee = match.group(1)
mention_match = MENTION_PATTERN.search(action_text)
if mention_match:
assignee = mention_match.group(1)
action_items.append(
{
"participant": participant_name,
@ -107,15 +136,17 @@ def extract_structured_basic(text: str) -> dict[str, list]:
"assignee": assignee,
}
)
elif lowered.startswith("assigned:"):
_, _, body = analysis.partition(":")
action_text = body.strip()
# Legacy support for plain text markers at line start
lowered = analysis.lower()
if lowered.startswith("assigned:"):
_, _, action_text = analysis.partition(":")
action_text = action_text.strip()
if action_text:
# Extract assignee from @mention in the line
assignee = participant_name # Default to participant claiming it
match = MENTION_PATTERN.search(line)
if match:
assignee = match.group(1)
assignee = participant_name
mention_match = MENTION_PATTERN.search(line)
if mention_match:
assignee = mention_match.group(1)
action_items.append(
{
"participant": participant_name,
@ -125,8 +156,8 @@ def extract_structured_basic(text: str) -> dict[str, list]:
}
)
elif lowered.startswith("done:"):
_, _, body = analysis.partition(":")
action_text = body.strip()
_, _, action_text = analysis.partition(":")
action_text = action_text.strip()
if action_text:
action_items.append(
{
@ -137,20 +168,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
}
)
# Decisions
if lowered.startswith("decision:"):
_, _, body = analysis.partition(":")
decision_text = body.strip()
if decision_text:
decisions.append(
{
"participant": participant_name,
"decision": decision_text,
"rationale": "",
"supporters": [],
}
)
# Mentions
for match in MENTION_PATTERN.finditer(line):
mentions.append(