fix: use regex to extract markers anywhere in text, not just at line start

- Added DECISION_PATTERN, QUESTION_PATTERN, ACTION_PATTERN regexes
- Support both plain (DECISION:) and markdown bold (**DECISION**:) formats
- Markers now detected anywhere in text, not just at line start
- Removed analysis_normalized since regex handles both variants directly
- Kept legacy support for ASSIGNED: and DONE: at line start
- Updated docstring to reflect regex-based approach
This commit is contained in:
rob 2025-11-02 11:58:06 -04:00
parent 2d53cfb09d
commit 0601117371
1 changed files with 60 additions and 43 deletions

View File

@ -33,12 +33,25 @@ DISCUSSION_SUFFIXES = (
SUMMARY_SUFFIX = ".sum.md" SUMMARY_SUFFIX = ".sum.md"
MENTION_PATTERN = re.compile(r"@(\w+|all)") MENTION_PATTERN = re.compile(r"@(\w+|all)")
# Patterns to extract structured markers (support both plain and markdown bold)
# Matches: DECISION: text, **DECISION**: text, decision: text
DECISION_PATTERN = re.compile(r'(?:\*\*)?DECISION(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*QUESTION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
QUESTION_PATTERN = re.compile(r'(?:\*\*)?(?:QUESTION|Q)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
ACTION_PATTERN = re.compile(r'(?:\*\*)?(?:ACTION|TODO)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*QUESTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
def extract_structured_basic(text: str) -> dict[str, list]: def extract_structured_basic(text: str) -> dict[str, list]:
""" """
Derive structured discussion signals using lightweight pattern matching. Derive structured discussion signals using regex pattern matching.
Recognises explicit markers (Q:, TODO:, DONE:, DECISION:) and @mentions. Recognises markers anywhere in text (not just at line start):
- DECISION: / **DECISION**: - Architectural/technical decisions
- QUESTION: / **QUESTION**: / Q: - Open questions needing answers
- ACTION: / **ACTION**: / TODO: - Action items with optional @assignee
- @mentions - References to participants
Also supports legacy line-start markers: ASSIGNED:, DONE:
Questions ending with '?' are auto-detected.
""" """
questions: list[dict[str, str]] = [] questions: list[dict[str, str]] = []
action_items: list[dict[str, str]] = [] action_items: list[dict[str, str]] = []
@ -66,7 +79,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
analysis = remainder.strip() if participant else stripped analysis = remainder.strip() if participant else stripped
if not analysis: if not analysis:
continue continue
lowered = analysis.lower()
participant_name = participant or "unknown" participant_name = participant or "unknown"
if timeline_data is None: if timeline_data is None:
@ -75,30 +87,47 @@ def extract_structured_basic(text: str) -> dict[str, list]:
"summary": _truncate_summary(analysis), "summary": _truncate_summary(analysis),
} }
# Questions # Extract decisions using regex (finds markers anywhere in line)
if lowered.startswith("q:") or lowered.startswith("question:"): for match in DECISION_PATTERN.finditer(analysis):
_, _, body = analysis.partition(":") decision_text = match.group(1).strip()
question_text = body.strip() if decision_text:
if question_text: decisions.append(
questions.append( {
{"participant": participant_name, "question": question_text, "status": "OPEN"} "participant": participant_name,
"decision": decision_text,
"rationale": "",
"supporters": [],
}
) )
elif analysis.endswith("?"):
question_text = analysis.rstrip("?").strip() # Extract questions using regex (finds markers anywhere in line)
for match in QUESTION_PATTERN.finditer(analysis):
question_text = match.group(1).strip()
if question_text: if question_text:
questions.append( questions.append(
{"participant": participant_name, "question": question_text, "status": "OPEN"} {"participant": participant_name, "question": question_text, "status": "OPEN"}
) )
# Action items # Also catch questions that end with '?' and don't have explicit marker
if lowered.startswith(("todo:", "action:")): if '?' in analysis and not QUESTION_PATTERN.search(analysis):
_, _, body = analysis.partition(":") # Simple heuristic: if line ends with ?, treat as question
action_text = body.strip() if analysis.rstrip().endswith('?'):
question_text = analysis.rstrip('?').strip()
# Avoid duplicate if already extracted
if question_text and not any(q['question'] == question_text for q in questions):
questions.append(
{"participant": participant_name, "question": question_text, "status": "OPEN"}
)
# Extract action items using regex (finds markers anywhere in line)
for match in ACTION_PATTERN.finditer(analysis):
action_text = match.group(1).strip()
if action_text: if action_text:
# Extract assignee from @mention in the line
assignee = None assignee = None
match = MENTION_PATTERN.search(line) mention_match = MENTION_PATTERN.search(action_text)
if match: if mention_match:
assignee = match.group(1) assignee = mention_match.group(1)
action_items.append( action_items.append(
{ {
"participant": participant_name, "participant": participant_name,
@ -107,15 +136,17 @@ def extract_structured_basic(text: str) -> dict[str, list]:
"assignee": assignee, "assignee": assignee,
} }
) )
elif lowered.startswith("assigned:"):
_, _, body = analysis.partition(":") # Legacy support for plain text markers at line start
action_text = body.strip() lowered = analysis.lower()
if lowered.startswith("assigned:"):
_, _, action_text = analysis.partition(":")
action_text = action_text.strip()
if action_text: if action_text:
# Extract assignee from @mention in the line assignee = participant_name
assignee = participant_name # Default to participant claiming it mention_match = MENTION_PATTERN.search(line)
match = MENTION_PATTERN.search(line) if mention_match:
if match: assignee = mention_match.group(1)
assignee = match.group(1)
action_items.append( action_items.append(
{ {
"participant": participant_name, "participant": participant_name,
@ -125,8 +156,8 @@ def extract_structured_basic(text: str) -> dict[str, list]:
} }
) )
elif lowered.startswith("done:"): elif lowered.startswith("done:"):
_, _, body = analysis.partition(":") _, _, action_text = analysis.partition(":")
action_text = body.strip() action_text = action_text.strip()
if action_text: if action_text:
action_items.append( action_items.append(
{ {
@ -137,20 +168,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
} }
) )
# Decisions
if lowered.startswith("decision:"):
_, _, body = analysis.partition(":")
decision_text = body.strip()
if decision_text:
decisions.append(
{
"participant": participant_name,
"decision": decision_text,
"rationale": "",
"supporters": [],
}
)
# Mentions # Mentions
for match in MENTION_PATTERN.finditer(line): for match in MENTION_PATTERN.finditer(line):
mentions.append( mentions.append(