orchestrated-discussions/smarttools/discussion-parser/config.yaml

111 lines
3.5 KiB
YAML

# discussion-parser - Extract structured data from discussion markdown
# Usage: cat discussion.md | discussion-parser | jq .
name: discussion-parser
description: Parse discussion markdown into structured JSON
category: Discussion
steps:
- type: code
code: |
import re
import json
from collections import Counter
content = input
# Parse metadata headers
metadata = {}
for match in re.finditer(r'<!--\s*(\w+):\s*(.+?)\s*-->', content):
key = match.group(1).lower()
value = match.group(2).strip()
if key == 'participants':
metadata[key] = [p.strip() for p in value.split(',')]
else:
metadata[key] = value
# Parse comment blocks and extract votes
votes = {}
comments = []
comment_pattern = re.compile(
r'^---\s*\n\s*Name:\s*(.+?)\n(.*?)(?=^---|\Z)',
re.MULTILINE | re.DOTALL
)
for match in comment_pattern.finditer(content):
author = match.group(1).strip()
body = match.group(2).strip()
# Extract vote from body
vote_match = re.search(
r'^VOTE:\s*(READY|CHANGES|REJECT)\s*$',
body,
re.MULTILINE | re.IGNORECASE
)
vote = vote_match.group(1).upper() if vote_match else None
# Extract markers from body
comment_questions = [m.group(1) for m in re.finditer(
r'^(?:Q|QUESTION):\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
)]
comment_concerns = [m.group(1) for m in re.finditer(
r'^CONCERN:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
)]
comment_decisions = [m.group(1) for m in re.finditer(
r'^DECISION:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
)]
comment_mentions = list(set(re.findall(r'@(\w+)', body)))
if vote:
votes[author] = vote
comments.append({
"author": author,
"body": body,
"vote": vote,
"mentions": comment_mentions,
"markers": {
"questions": comment_questions,
"concerns": comment_concerns,
"decisions": comment_decisions
}
})
# Extract global markers
questions = [m.group(1) for m in re.finditer(
r'^(?:Q|QUESTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
)]
concerns = [m.group(1) for m in re.finditer(
r'^CONCERN:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
)]
decisions = [m.group(1) for m in re.finditer(
r'^DECISION:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
)]
todos = [m.group(1) for m in re.finditer(
r'^(?:TODO|ACTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
)]
mentions = list(set(re.findall(r'@(\w+)', content)))
# Vote summary
vote_counts = Counter(votes.values())
parsed = json.dumps({
"metadata": metadata,
"comments": comments,
"votes": votes,
"vote_summary": {
"READY": vote_counts.get("READY", 0),
"CHANGES": vote_counts.get("CHANGES", 0),
"REJECT": vote_counts.get("REJECT", 0),
"total": len(votes)
},
"questions": questions,
"concerns": concerns,
"decisions": decisions,
"todos": todos,
"mentions": mentions
}, indent=2)
output_var: parsed
output: "{parsed}"