111 lines
3.5 KiB
YAML
111 lines
3.5 KiB
YAML
# discussion-parser - Extract structured data from discussion markdown
|
|
# Usage: cat discussion.md | discussion-parser | jq .
|
|
|
|
name: discussion-parser
|
|
description: Parse discussion markdown into structured JSON
|
|
category: Discussion
|
|
|
|
steps:
|
|
- type: code
|
|
code: |
|
|
import re
|
|
import json
|
|
from collections import Counter
|
|
|
|
content = input
|
|
|
|
# Parse metadata headers
|
|
metadata = {}
|
|
for match in re.finditer(r'<!--\s*(\w+):\s*(.+?)\s*-->', content):
|
|
key = match.group(1).lower()
|
|
value = match.group(2).strip()
|
|
if key == 'participants':
|
|
metadata[key] = [p.strip() for p in value.split(',')]
|
|
else:
|
|
metadata[key] = value
|
|
|
|
# Parse comment blocks and extract votes
|
|
votes = {}
|
|
comments = []
|
|
comment_pattern = re.compile(
|
|
r'^---\s*\n\s*Name:\s*(.+?)\n(.*?)(?=^---|\Z)',
|
|
re.MULTILINE | re.DOTALL
|
|
)
|
|
|
|
for match in comment_pattern.finditer(content):
|
|
author = match.group(1).strip()
|
|
body = match.group(2).strip()
|
|
|
|
# Extract vote from body
|
|
vote_match = re.search(
|
|
r'^VOTE:\s*(READY|CHANGES|REJECT)\s*$',
|
|
body,
|
|
re.MULTILINE | re.IGNORECASE
|
|
)
|
|
vote = vote_match.group(1).upper() if vote_match else None
|
|
|
|
# Extract markers from body
|
|
comment_questions = [m.group(1) for m in re.finditer(
|
|
r'^(?:Q|QUESTION):\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
comment_concerns = [m.group(1) for m in re.finditer(
|
|
r'^CONCERN:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
comment_decisions = [m.group(1) for m in re.finditer(
|
|
r'^DECISION:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
comment_mentions = list(set(re.findall(r'@(\w+)', body)))
|
|
|
|
if vote:
|
|
votes[author] = vote
|
|
|
|
comments.append({
|
|
"author": author,
|
|
"body": body,
|
|
"vote": vote,
|
|
"mentions": comment_mentions,
|
|
"markers": {
|
|
"questions": comment_questions,
|
|
"concerns": comment_concerns,
|
|
"decisions": comment_decisions
|
|
}
|
|
})
|
|
|
|
# Extract global markers
|
|
questions = [m.group(1) for m in re.finditer(
|
|
r'^(?:Q|QUESTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
concerns = [m.group(1) for m in re.finditer(
|
|
r'^CONCERN:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
decisions = [m.group(1) for m in re.finditer(
|
|
r'^DECISION:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
todos = [m.group(1) for m in re.finditer(
|
|
r'^(?:TODO|ACTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
|
|
)]
|
|
mentions = list(set(re.findall(r'@(\w+)', content)))
|
|
|
|
# Vote summary
|
|
vote_counts = Counter(votes.values())
|
|
|
|
parsed = json.dumps({
|
|
"metadata": metadata,
|
|
"comments": comments,
|
|
"votes": votes,
|
|
"vote_summary": {
|
|
"READY": vote_counts.get("READY", 0),
|
|
"CHANGES": vote_counts.get("CHANGES", 0),
|
|
"REJECT": vote_counts.get("REJECT", 0),
|
|
"total": len(votes)
|
|
},
|
|
"questions": questions,
|
|
"concerns": concerns,
|
|
"decisions": decisions,
|
|
"todos": todos,
|
|
"mentions": mentions
|
|
}, indent=2)
|
|
output_var: parsed
|
|
|
|
output: "{parsed}"
|