# discussion-parser - Extract structured data from discussion markdown
# Usage: cat discussion.md | discussion-parser | jq .

name: discussion-parser
description: Parse discussion markdown into structured JSON
category: Discussion

steps:
  - type: code
    code: |
      import re
      import json
      from collections import Counter

      content = input

      # Parse metadata headers
      metadata = {}
      for match in re.finditer(r'<!--\s*(\w+):\s*(.+?)\s*-->', content):
          key = match.group(1).lower()
          value = match.group(2).strip()
          if key == 'participants':
              metadata[key] = [p.strip() for p in value.split(',')]
          else:
              metadata[key] = value

      # Parse comment blocks and extract votes
      votes = {}
      comments = []
      comment_pattern = re.compile(
          r'^---\s*\n\s*Name:\s*(.+?)\n(.*?)(?=^---|\Z)',
          re.MULTILINE | re.DOTALL
      )

      for match in comment_pattern.finditer(content):
          author = match.group(1).strip()
          body = match.group(2).strip()

          # Extract vote from body
          vote_match = re.search(
              r'^VOTE:\s*(READY|CHANGES|REJECT)\s*$',
              body,
              re.MULTILINE | re.IGNORECASE
          )
          vote = vote_match.group(1).upper() if vote_match else None

          # Extract markers from body
          comment_questions = [m.group(1) for m in re.finditer(
              r'^(?:Q|QUESTION):\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
          )]
          comment_concerns = [m.group(1) for m in re.finditer(
              r'^CONCERN:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
          )]
          comment_decisions = [m.group(1) for m in re.finditer(
              r'^DECISION:\s*(.+)$', body, re.MULTILINE | re.IGNORECASE
          )]
          comment_mentions = list(set(re.findall(r'@(\w+)', body)))

          if vote:
              votes[author] = vote

          comments.append({
              "author": author,
              "body": body,
              "vote": vote,
              "mentions": comment_mentions,
              "markers": {
                  "questions": comment_questions,
                  "concerns": comment_concerns,
                  "decisions": comment_decisions
              }
          })

      # Extract global markers
      questions = [m.group(1) for m in re.finditer(
          r'^(?:Q|QUESTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
      )]
      concerns = [m.group(1) for m in re.finditer(
          r'^CONCERN:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
      )]
      decisions = [m.group(1) for m in re.finditer(
          r'^DECISION:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
      )]
      todos = [m.group(1) for m in re.finditer(
          r'^(?:TODO|ACTION):\s*(.+)$', content, re.MULTILINE | re.IGNORECASE
      )]
      mentions = list(set(re.findall(r'@(\w+)', content)))

      # Vote summary
      vote_counts = Counter(votes.values())

      parsed = json.dumps({
          "metadata": metadata,
          "comments": comments,
          "votes": votes,
          "vote_summary": {
              "READY": vote_counts.get("READY", 0),
              "CHANGES": vote_counts.get("CHANGES", 0),
              "REJECT": vote_counts.get("REJECT", 0),
              "total": len(votes)
          },
          "questions": questions,
          "concerns": concerns,
          "decisions": decisions,
          "todos": todos,
          "mentions": mentions
      }, indent=2)
    output_var: parsed

output: "{parsed}"