# discussion-parser - Extract structured data from discussion markdown # Usage: cat discussion.md | discussion-parser | jq . name: discussion-parser description: Parse discussion markdown into structured JSON category: Discussion steps: - type: code code: | import re import json from collections import Counter content = input # Parse metadata headers metadata = {} for match in re.finditer(r'', content): key = match.group(1).lower() value = match.group(2).strip() if key == 'participants': metadata[key] = [p.strip() for p in value.split(',')] else: metadata[key] = value # Parse comment blocks and extract votes votes = {} comments = [] comment_pattern = re.compile( r'^---\s*\n\s*Name:\s*(.+?)\n(.*?)(?=^---|\Z)', re.MULTILINE | re.DOTALL ) for match in comment_pattern.finditer(content): author = match.group(1).strip() body = match.group(2).strip() # Extract vote from body vote_match = re.search( r'^VOTE:\s*(READY|CHANGES|REJECT)\s*$', body, re.MULTILINE | re.IGNORECASE ) vote = vote_match.group(1).upper() if vote_match else None # Extract markers from body comment_questions = [m.group(1) for m in re.finditer( r'^(?:Q|QUESTION):\s*(.+)$', body, re.MULTILINE | re.IGNORECASE )] comment_concerns = [m.group(1) for m in re.finditer( r'^CONCERN:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE )] comment_decisions = [m.group(1) for m in re.finditer( r'^DECISION:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE )] comment_mentions = list(set(re.findall(r'@(\w+)', body))) if vote: votes[author] = vote comments.append({ "author": author, "body": body, "vote": vote, "mentions": comment_mentions, "markers": { "questions": comment_questions, "concerns": comment_concerns, "decisions": comment_decisions } }) # Extract global markers questions = [m.group(1) for m in re.finditer( r'^(?:Q|QUESTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE )] concerns = [m.group(1) for m in re.finditer( r'^CONCERN:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE )] decisions = [m.group(1) for m in re.finditer( r'^DECISION:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE )] todos = [m.group(1) for m in re.finditer( r'^(?:TODO|ACTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE )] mentions = list(set(re.findall(r'@(\w+)', content))) # Vote summary vote_counts = Counter(votes.values()) parsed = json.dumps({ "metadata": metadata, "comments": comments, "votes": votes, "vote_summary": { "READY": vote_counts.get("READY", 0), "CHANGES": vote_counts.get("CHANGES", 0), "REJECT": vote_counts.get("REJECT", 0), "total": len(votes) }, "questions": questions, "concerns": concerns, "decisions": decisions, "todos": todos, "mentions": mentions }, indent=2) output_var: parsed output: "{parsed}"