fix: use regex to extract markers anywhere in text, not just at line start
- Added DECISION_PATTERN, QUESTION_PATTERN, ACTION_PATTERN regexes - Support both plain (DECISION:) and markdown bold (**DECISION**:) formats - Markers now detected anywhere in text, not just at line start - Removed analysis_normalized since regex handles both variants directly - Kept legacy support for ASSIGNED: and DONE: at line start - Updated docstring to reflect regex-based approach
This commit is contained in:
parent
2d53cfb09d
commit
0601117371
|
|
@ -33,12 +33,25 @@ DISCUSSION_SUFFIXES = (
|
||||||
SUMMARY_SUFFIX = ".sum.md"
|
SUMMARY_SUFFIX = ".sum.md"
|
||||||
MENTION_PATTERN = re.compile(r"@(\w+|all)")
|
MENTION_PATTERN = re.compile(r"@(\w+|all)")
|
||||||
|
|
||||||
|
# Patterns to extract structured markers (support both plain and markdown bold)
|
||||||
|
# Matches: DECISION: text, **DECISION**: text, decision: text
|
||||||
|
DECISION_PATTERN = re.compile(r'(?:\*\*)?DECISION(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*QUESTION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
|
||||||
|
QUESTION_PATTERN = re.compile(r'(?:\*\*)?(?:QUESTION|Q)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*ACTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
|
||||||
|
ACTION_PATTERN = re.compile(r'(?:\*\*)?(?:ACTION|TODO)(?:\*\*)?\s*:\s*(.+?)(?=\s*(?:\*\*DECISION|\*\*QUESTION|\*\*SUGGESTION|VOTE:)|$)', re.IGNORECASE | re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def extract_structured_basic(text: str) -> dict[str, list]:
|
def extract_structured_basic(text: str) -> dict[str, list]:
|
||||||
"""
|
"""
|
||||||
Derive structured discussion signals using lightweight pattern matching.
|
Derive structured discussion signals using regex pattern matching.
|
||||||
|
|
||||||
Recognises explicit markers (Q:, TODO:, DONE:, DECISION:) and @mentions.
|
Recognises markers anywhere in text (not just at line start):
|
||||||
|
- DECISION: / **DECISION**: - Architectural/technical decisions
|
||||||
|
- QUESTION: / **QUESTION**: / Q: - Open questions needing answers
|
||||||
|
- ACTION: / **ACTION**: / TODO: - Action items with optional @assignee
|
||||||
|
- @mentions - References to participants
|
||||||
|
|
||||||
|
Also supports legacy line-start markers: ASSIGNED:, DONE:
|
||||||
|
Questions ending with '?' are auto-detected.
|
||||||
"""
|
"""
|
||||||
questions: list[dict[str, str]] = []
|
questions: list[dict[str, str]] = []
|
||||||
action_items: list[dict[str, str]] = []
|
action_items: list[dict[str, str]] = []
|
||||||
|
|
@ -66,7 +79,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
||||||
analysis = remainder.strip() if participant else stripped
|
analysis = remainder.strip() if participant else stripped
|
||||||
if not analysis:
|
if not analysis:
|
||||||
continue
|
continue
|
||||||
lowered = analysis.lower()
|
|
||||||
participant_name = participant or "unknown"
|
participant_name = participant or "unknown"
|
||||||
|
|
||||||
if timeline_data is None:
|
if timeline_data is None:
|
||||||
|
|
@ -75,30 +87,47 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
||||||
"summary": _truncate_summary(analysis),
|
"summary": _truncate_summary(analysis),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Questions
|
# Extract decisions using regex (finds markers anywhere in line)
|
||||||
if lowered.startswith("q:") or lowered.startswith("question:"):
|
for match in DECISION_PATTERN.finditer(analysis):
|
||||||
_, _, body = analysis.partition(":")
|
decision_text = match.group(1).strip()
|
||||||
question_text = body.strip()
|
if decision_text:
|
||||||
if question_text:
|
decisions.append(
|
||||||
questions.append(
|
{
|
||||||
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
"participant": participant_name,
|
||||||
|
"decision": decision_text,
|
||||||
|
"rationale": "",
|
||||||
|
"supporters": [],
|
||||||
|
}
|
||||||
)
|
)
|
||||||
elif analysis.endswith("?"):
|
|
||||||
question_text = analysis.rstrip("?").strip()
|
# Extract questions using regex (finds markers anywhere in line)
|
||||||
|
for match in QUESTION_PATTERN.finditer(analysis):
|
||||||
|
question_text = match.group(1).strip()
|
||||||
if question_text:
|
if question_text:
|
||||||
questions.append(
|
questions.append(
|
||||||
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Action items
|
# Also catch questions that end with '?' and don't have explicit marker
|
||||||
if lowered.startswith(("todo:", "action:")):
|
if '?' in analysis and not QUESTION_PATTERN.search(analysis):
|
||||||
_, _, body = analysis.partition(":")
|
# Simple heuristic: if line ends with ?, treat as question
|
||||||
action_text = body.strip()
|
if analysis.rstrip().endswith('?'):
|
||||||
|
question_text = analysis.rstrip('?').strip()
|
||||||
|
# Avoid duplicate if already extracted
|
||||||
|
if question_text and not any(q['question'] == question_text for q in questions):
|
||||||
|
questions.append(
|
||||||
|
{"participant": participant_name, "question": question_text, "status": "OPEN"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract action items using regex (finds markers anywhere in line)
|
||||||
|
for match in ACTION_PATTERN.finditer(analysis):
|
||||||
|
action_text = match.group(1).strip()
|
||||||
if action_text:
|
if action_text:
|
||||||
|
# Extract assignee from @mention in the line
|
||||||
assignee = None
|
assignee = None
|
||||||
match = MENTION_PATTERN.search(line)
|
mention_match = MENTION_PATTERN.search(action_text)
|
||||||
if match:
|
if mention_match:
|
||||||
assignee = match.group(1)
|
assignee = mention_match.group(1)
|
||||||
action_items.append(
|
action_items.append(
|
||||||
{
|
{
|
||||||
"participant": participant_name,
|
"participant": participant_name,
|
||||||
|
|
@ -107,15 +136,17 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
||||||
"assignee": assignee,
|
"assignee": assignee,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
elif lowered.startswith("assigned:"):
|
|
||||||
_, _, body = analysis.partition(":")
|
# Legacy support for plain text markers at line start
|
||||||
action_text = body.strip()
|
lowered = analysis.lower()
|
||||||
|
if lowered.startswith("assigned:"):
|
||||||
|
_, _, action_text = analysis.partition(":")
|
||||||
|
action_text = action_text.strip()
|
||||||
if action_text:
|
if action_text:
|
||||||
# Extract assignee from @mention in the line
|
assignee = participant_name
|
||||||
assignee = participant_name # Default to participant claiming it
|
mention_match = MENTION_PATTERN.search(line)
|
||||||
match = MENTION_PATTERN.search(line)
|
if mention_match:
|
||||||
if match:
|
assignee = mention_match.group(1)
|
||||||
assignee = match.group(1)
|
|
||||||
action_items.append(
|
action_items.append(
|
||||||
{
|
{
|
||||||
"participant": participant_name,
|
"participant": participant_name,
|
||||||
|
|
@ -125,8 +156,8 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
elif lowered.startswith("done:"):
|
elif lowered.startswith("done:"):
|
||||||
_, _, body = analysis.partition(":")
|
_, _, action_text = analysis.partition(":")
|
||||||
action_text = body.strip()
|
action_text = action_text.strip()
|
||||||
if action_text:
|
if action_text:
|
||||||
action_items.append(
|
action_items.append(
|
||||||
{
|
{
|
||||||
|
|
@ -137,20 +168,6 @@ def extract_structured_basic(text: str) -> dict[str, list]:
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Decisions
|
|
||||||
if lowered.startswith("decision:"):
|
|
||||||
_, _, body = analysis.partition(":")
|
|
||||||
decision_text = body.strip()
|
|
||||||
if decision_text:
|
|
||||||
decisions.append(
|
|
||||||
{
|
|
||||||
"participant": participant_name,
|
|
||||||
"decision": decision_text,
|
|
||||||
"rationale": "",
|
|
||||||
"supporters": [],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Mentions
|
# Mentions
|
||||||
for match in MENTION_PATTERN.finditer(line):
|
for match in MENTION_PATTERN.finditer(line):
|
||||||
mentions.append(
|
mentions.append(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue