fix: use regex to extract markers anywhere in text, not just at line start
- Added DECISION_PATTERN, QUESTION_PATTERN, ACTION_PATTERN regexes - Support both plain (DECISION:) and markdown bold (**DECISION**:) formats - Markers now detected anywhere in text, not just at line start - Removed analysis_normalized since regex handles both variants directly - Kept legacy support for ASSIGNED: and DONE: at line start - Updated docstring to reflect regex-based approach
This commit is contained in:
parent
2d53cfb09d
commit
0601117371
|
|
@ -33,12 +33,25 @@ DISCUSSION_SUFFIXES = (
|
|||
SUMMARY_SUFFIX = ".sum.md"
|
||||
MENTION_PATTERN = re.compile(r"@(\w+|all)")
|
||||
|
||||
# Patterns to extract structured markers (support both plain and markdown bold)
|
||||
# Matches: DECISION: text, **DECISION**: text, decision: text
|
||||
DECISION_PATTERN = re.compile(r'(?:\*\*)?DECISION(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*QUESTION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
|
||||
QUESTION_PATTERN = re.compile(r'(?:\*\*)?(?:QUESTION|Q)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
|
||||
ACTION_PATTERN = re.compile(r'(?:\*\*)?(?:ACTION|TODO)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*QUESTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
|
||||
|
||||
|
||||
def extract_structured_basic(text: str) -> dict[str, list]:
|
||||
"""
|
||||
Derive structured discussion signals using lightweight pattern matching.
|
||||
Derive structured discussion signals using regex pattern matching.
|
||||
|
||||
Recognises explicit markers (Q:, TODO:, DONE:, DECISION:) and @mentions.
|
||||
Recognises markers anywhere in text (not just at line start):
|
||||
- DECISION: / **DECISION**: - Architectural/technical decisions
|
||||
- QUESTION: / **QUESTION**: / Q: - Open questions needing answers
|
||||
- ACTION: / **ACTION**: / TODO: - Action items with optional @assignee
|
||||
- @mentions - References to participants
|
||||
|
||||
Also supports legacy line-start markers: ASSIGNED:, DONE:
|
||||
Questions ending with '?' are auto-detected.
|
||||
"""
|
||||
questions: list[dict[str, str]] = []
|
||||
action_items: list[dict[str, str]] = []
|
||||
|
|
@ -66,7 +79,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
|||
analysis = remainder.strip() if participant else stripped
|
||||
if not analysis:
|
||||
continue
|
||||
lowered = analysis.lower()
|
||||
participant_name = participant or "unknown"
|
||||
|
||||
if timeline_data is None:
|
||||
|
|
@ -75,30 +87,47 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
|||
"summary": _truncate_summary(analysis),
|
||||
}
|
||||
|
||||
# Questions
|
||||
if lowered.startswith("q:") or lowered.startswith("question:"):
|
||||
_, _, body = analysis.partition(":")
|
||||
question_text = body.strip()
|
||||
if question_text:
|
||||
questions.append(
|
||||
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
||||
# Extract decisions using regex (finds markers anywhere in line)
|
||||
for match in DECISION_PATTERN.finditer(analysis):
|
||||
decision_text = match.group(1).strip()
|
||||
if decision_text:
|
||||
decisions.append(
|
||||
{
|
||||
"participant": participant_name,
|
||||
"decision": decision_text,
|
||||
"rationale": "",
|
||||
"supporters": [],
|
||||
}
|
||||
)
|
||||
elif analysis.endswith("?"):
|
||||
question_text = analysis.rstrip("?").strip()
|
||||
|
||||
# Extract questions using regex (finds markers anywhere in line)
|
||||
for match in QUESTION_PATTERN.finditer(analysis):
|
||||
question_text = match.group(1).strip()
|
||||
if question_text:
|
||||
questions.append(
|
||||
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
||||
)
|
||||
|
||||
# Action items
|
||||
if lowered.startswith(("todo:", "action:")):
|
||||
_, _, body = analysis.partition(":")
|
||||
action_text = body.strip()
|
||||
# Also catch questions that end with '?' and don't have explicit marker
|
||||
if '?' in analysis and not QUESTION_PATTERN.search(analysis):
|
||||
# Simple heuristic: if line ends with ?, treat as question
|
||||
if analysis.rstrip().endswith('?'):
|
||||
question_text = analysis.rstrip('?').strip()
|
||||
# Avoid duplicate if already extracted
|
||||
if question_text and not any(q['question'] == question_text for q in questions):
|
||||
questions.append(
|
||||
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
||||
)
|
||||
|
||||
# Extract action items using regex (finds markers anywhere in line)
|
||||
for match in ACTION_PATTERN.finditer(analysis):
|
||||
action_text = match.group(1).strip()
|
||||
if action_text:
|
||||
# Extract assignee from @mention in the line
|
||||
assignee = None
|
||||
match = MENTION_PATTERN.search(line)
|
||||
if match:
|
||||
assignee = match.group(1)
|
||||
mention_match = MENTION_PATTERN.search(action_text)
|
||||
if mention_match:
|
||||
assignee = mention_match.group(1)
|
||||
action_items.append(
|
||||
{
|
||||
"participant": participant_name,
|
||||
|
|
@ -107,15 +136,17 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
|||
"assignee": assignee,
|
||||
}
|
||||
)
|
||||
elif lowered.startswith("assigned:"):
|
||||
_, _, body = analysis.partition(":")
|
||||
action_text = body.strip()
|
||||
|
||||
# Legacy support for plain text markers at line start
|
||||
lowered = analysis.lower()
|
||||
if lowered.startswith("assigned:"):
|
||||
_, _, action_text = analysis.partition(":")
|
||||
action_text = action_text.strip()
|
||||
if action_text:
|
||||
# Extract assignee from @mention in the line
|
||||
assignee = participant_name # Default to participant claiming it
|
||||
match = MENTION_PATTERN.search(line)
|
||||
if match:
|
||||
assignee = match.group(1)
|
||||
assignee = participant_name
|
||||
mention_match = MENTION_PATTERN.search(line)
|
||||
if mention_match:
|
||||
assignee = mention_match.group(1)
|
||||
action_items.append(
|
||||
{
|
||||
"participant": participant_name,
|
||||
|
|
@ -125,8 +156,8 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
|||
}
|
||||
)
|
||||
elif lowered.startswith("done:"):
|
||||
_, _, body = analysis.partition(":")
|
||||
action_text = body.strip()
|
||||
_, _, action_text = analysis.partition(":")
|
||||
action_text = action_text.strip()
|
||||
if action_text:
|
||||
action_items.append(
|
||||
{
|
||||
|
|
@ -137,20 +168,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
|||
}
|
||||
)
|
||||
|
||||
# Decisions
|
||||
if lowered.startswith("decision:"):
|
||||
_, _, body = analysis.partition(":")
|
||||
decision_text = body.strip()
|
||||
if decision_text:
|
||||
decisions.append(
|
||||
{
|
||||
"participant": participant_name,
|
||||
"decision": decision_text,
|
||||
"rationale": "",
|
||||
"supporters": [],
|
||||
}
|
||||
)
|
||||
|
||||
# Mentions
|
||||
for match in MENTION_PATTERN.finditer(line):
|
||||
mentions.append(
|
||||
|
|
|
|||
Loading…
Reference in New Issue