development-hub/src/development_hub/parsers/goals_parser.py

"""Parser for goals and milestones files."""

import re
from pathlib import Path

from development_hub.parsers.base import BaseParser, atomic_write
from development_hub.models.goal import (
    Goal,
    GoalList,
    Milestone,
    Deliverable,
    MilestoneStatus,
    DeliverableStatus,
)


class GoalsParser(BaseParser):
    """Parse project-level goals.md files."""

    def parse(self) -> GoalList:
        """Parse goals.md file.

        Expected format:
        ---
        type: goals
        project: project-name
        ---

        ## Active
        - [ ] Goal description #high
        - [x] Completed goal #medium

        ## Future
        - [ ] Future goal

        ## Non-Goals
        - Not doing this

        Returns:
            GoalList with parsed goals
        """
        goal_list = GoalList(
            project=self.frontmatter.get("project"),
            updated=self.frontmatter.get("updated"),
        )

        if not self.body:
            return goal_list

        current_section = "active"

        for line in self.body.split("\n"):
            line_stripped = line.strip()

            # Detect section headers
            if line_stripped.startswith("## "):
                header = line_stripped[3:].strip().lower()
                if "active" in header:
                    current_section = "active"
                elif "future" in header:
                    current_section = "future"
                elif "non-goal" in header or "non goal" in header:
                    current_section = "non-goals"
                continue

            # Handle checkbox items (Active, Future, and Non-Goals)
            if line_stripped.startswith("- ["):
                goal = self._parse_goal_line(line_stripped)
                if goal:
                    if current_section == "active":
                        goal_list.active.append(goal)
                    elif current_section == "future":
                        goal_list.future.append(goal)
                    else:  # non-goals
                        goal_list.non_goals.append(goal)
            # Handle plain bullet items in non-goals section (backwards compatibility)
            elif current_section == "non-goals" and line_stripped.startswith("- "):
                text = line_stripped[2:].strip()
                if text:
                    # Extract priority from hashtags
                    priority = "medium"
                    tags, text = self.extract_hashtags(text)
                    for tag in tags:
                        if tag in ("high", "medium", "low"):
                            priority = tag
                            break
                    goal = Goal(text=text, completed=False, priority=priority, tags=tags)
                    goal_list.non_goals.append(goal)

        return goal_list

    def _parse_goal_line(self, line: str) -> Goal | None:
        """Parse a single goal line with three states."""
        completed, partial, text = self.parse_checkbox_triple(line)

        if not text:
            return None

        # Extract priority from hashtags
        priority = "medium"
        tags, text = self.extract_hashtags(text)
        for tag in tags:
            if tag in ("high", "medium", "low"):
                priority = tag
                tags.remove(tag)
                break

        # Extract completion date
        date, text = self.extract_date(text)

        return Goal(
            text=text,
            completed=completed,
            partial=partial,
            priority=priority,
            tags=tags,
            completed_date=date,
        )


class MilestonesParser(BaseParser):
    """Parse milestones.md file."""

    def parse(self) -> list[Milestone]:
        """Parse milestones.md file.

        Expected format (flat list, no section headers):
        #### M1: Milestone Name
        **Target**: January 2026
        **Status**: In Progress (80%)

        Description text here...

        ---

        #### M2: Another Milestone
        **Target**: February 2026
        **Status**: Completed (100%)

        Returns:
            List of Milestone instances
        """
        milestones = []

        if not self.body:
            return milestones

        milestone_pattern = r"####\s+(M[\d.]+):\s*(.+)"

        lines = self.body.split("\n")
        current_milestone_lines = []
        current_milestone_id = None
        current_milestone_name = None

        for line in lines:
            line_stripped = line.strip()

            # Skip section headers (legacy support)
            if line_stripped.startswith("## "):
                continue

            # Check for milestone header
            match = re.match(milestone_pattern, line_stripped)
            if match:
                # Save previous milestone if exists
                if current_milestone_id:
                    milestone = self._parse_milestone_content(
                        current_milestone_id,
                        current_milestone_name,
                        "\n".join(current_milestone_lines),
                    )
                    if milestone:
                        milestones.append(milestone)

                # Start new milestone
                current_milestone_id = match.group(1)
                current_milestone_name = match.group(2).strip()
                current_milestone_lines = []
            elif current_milestone_id:
                # Accumulate lines for current milestone
                current_milestone_lines.append(line)

        # Don't forget the last milestone
        if current_milestone_id:
            milestone = self._parse_milestone_content(
                current_milestone_id,
                current_milestone_name,
                "\n".join(current_milestone_lines),
            )
            if milestone:
                milestones.append(milestone)

        return milestones

    def _parse_milestone_content(
        self, mid: str, name: str, content: str
    ) -> Milestone | None:
        """Parse milestone content block.

        Args:
            mid: Milestone ID (e.g., "M1")
            name: Milestone name
            content: Content after header

        Returns:
            Milestone instance
        """
        target = ""
        status = MilestoneStatus.NOT_STARTED
        progress = 0
        deliverables = []
        notes = ""
        description_lines = []

        lines = content.split("\n")
        table_lines = []
        in_table = False

        for line in lines:
            line_stripped = line.strip()

            # Skip separators
            if line_stripped == "---":
                continue

            # Parse **Target**: value
            target_match = re.match(r"\*\*Target\*\*:\s*(.+)", line_stripped)
            if target_match:
                target = target_match.group(1).strip()
                continue

            # Parse **Status**: value (percentage)
            status_match = re.match(r"\*\*Status\*\*:\s*(.+)", line_stripped)
            if status_match:
                status_text = status_match.group(1).strip()
                status, progress = self._parse_status(status_text)
                continue

            # Parse **Notes**: value
            notes_match = re.match(r"\*\*Notes\*\*:\s*(.+)", line_stripped)
            if notes_match:
                notes = notes_match.group(1).strip()
                continue

            # Parse deliverables table
            if line_stripped.startswith("|"):
                in_table = True
                table_lines.append(line_stripped)
                continue
            elif in_table and not line_stripped.startswith("|"):
                # Table ended
                deliverables = self._parse_deliverables_table(table_lines)
                table_lines = []
                in_table = False

            # Collect description lines (non-empty, non-field lines)
            if line_stripped and not in_table:
                description_lines.append(line_stripped)

        # Handle any remaining table
        if table_lines:
            deliverables = self._parse_deliverables_table(table_lines)

        return Milestone(
            id=mid,
            name=name,
            target=target,
            status=status,
            progress=progress,
            deliverables=deliverables,
            notes=notes,
            description=" ".join(description_lines),
        )

    def _parse_status(self, status_text: str) -> tuple[MilestoneStatus, int]:
        """Parse status text like 'In Progress (80%)'.

        Returns:
            Tuple of (MilestoneStatus, progress percentage)
        """
        progress = 0

        # Extract percentage if present
        pct_match = re.search(r"\((\d+)%\)", status_text)
        if pct_match:
            progress = int(pct_match.group(1))

        # Determine status
        status_lower = status_text.lower()
        if "complete" in status_lower:
            return MilestoneStatus.COMPLETE, 100
        elif "in progress" in status_lower:
            return MilestoneStatus.IN_PROGRESS, progress
        elif "planning" in status_lower:
            return MilestoneStatus.PLANNING, progress
        else:
            return MilestoneStatus.NOT_STARTED, 0

    def _parse_deliverables_table(self, lines: list[str]) -> list[Deliverable]:
        """Parse deliverables table.

        Args:
            lines: Table lines

        Returns:
            List of Deliverable instances
        """
        deliverables = []
        rows = self.parse_table(lines)

        for row in rows:
            if len(row) >= 2:
                name = row[0]
                status_text = row[1].lower()

                if "done" in status_text:
                    status = DeliverableStatus.DONE
                elif "progress" in status_text:
                    status = DeliverableStatus.IN_PROGRESS
                else:
                    status = DeliverableStatus.NOT_STARTED

                deliverables.append(Deliverable(name=name, status=status))

        return deliverables

    def get_active_milestones(self) -> list[Milestone]:
        """Get only active (non-complete) milestones."""
        return [m for m in self.parse() if not m.is_complete]

    def get_current_milestone(self) -> Milestone | None:
        """Get the first in-progress milestone."""
        for m in self.parse():
            if m.status == MilestoneStatus.IN_PROGRESS:
                return m
        # Fall back to first non-complete
        active = self.get_active_milestones()
        return active[0] if active else None

    def save(self, milestones: list[Milestone]):
        """Save milestones back to the file.

        Args:
            milestones: List of Milestone instances to save
        """
        lines = []

        # Write frontmatter
        lines.append("---")
        for key, value in self.frontmatter.items():
            lines.append(f"{key}: {value}")
        lines.append("---")
        lines.append("")
        lines.append("# Milestones")
        lines.append("")

        # Separate active and completed milestones
        active = [m for m in milestones if m.status != MilestoneStatus.COMPLETE]
        completed = [m for m in milestones if m.status == MilestoneStatus.COMPLETE]

        # Write active milestones
        lines.append("## Active")
        lines.append("")
        for milestone in active:
            lines.extend(self._format_milestone(milestone))
            lines.append("")

        # Write completed milestones
        lines.append("## Completed")
        lines.append("")
        for milestone in completed:
            lines.extend(self._format_milestone(milestone))
            lines.append("")

        atomic_write(self.file_path, "\n".join(lines))

    def _format_milestone(self, milestone: Milestone) -> list[str]:
        """Format a single milestone as markdown lines.

        Args:
            milestone: Milestone to format

        Returns:
            List of markdown lines
        """
        lines = []

        # Header
        lines.append(f"#### {milestone.id}: {milestone.name}")

        # Target
        if milestone.target:
            lines.append(f"**Target**: {milestone.target}")

        # Status with progress
        progress = milestone.calculate_progress()
        if milestone.status == MilestoneStatus.COMPLETE:
            lines.append(f"**Status**: Completed ({progress}%)")
        elif milestone.status == MilestoneStatus.IN_PROGRESS:
            lines.append(f"**Status**: In Progress ({progress}%)")
        elif milestone.status == MilestoneStatus.PLANNING:
            lines.append(f"**Status**: Planning ({progress}%)")
        else:
            lines.append("**Status**: Not Started")

        # Notes
        if milestone.notes:
            lines.append(f"**Notes**: {milestone.notes}")

        # Description (after fields, before table)
        if milestone.description:
            lines.append("")
            lines.append(milestone.description)

        # Deliverables table
        if milestone.deliverables:
            lines.append("")
            lines.append("| Deliverable | Status |")
            lines.append("|-------------|--------|")

            for d in milestone.deliverables:
                lines.append(f"| {d.name} | {d.status.value} |")

        lines.append("")
        lines.append("---")

        return lines


class GoalsSaver:
    """Save goals back to goals.md file.

    This saver preserves prose content (Vision, Principles, etc.) that appears
    before the goal sections. Only the ## Active, ## Future, and ## Non-Goals
    sections are rewritten with updated checkbox states.
    """

    def __init__(self, path: Path, frontmatter: dict):
        """Initialize saver.

        Args:
            path: Path to goals.md file
            frontmatter: Frontmatter dict to preserve
        """
        self.path = path
        self.frontmatter = frontmatter

    def save(self, goal_list: GoalList):
        """Save goals back to file, preserving prose content.

        Args:
            goal_list: GoalList to save
        """
        # Read existing file to preserve prose content
        existing_content = ""
        if self.path.exists():
            existing_content = self.path.read_text()

        # Extract prose content (everything before first goal section)
        prose_content = self._extract_prose_content(existing_content)

        lines = []

        # Write frontmatter
        lines.append("---")
        for key, value in self.frontmatter.items():
            lines.append(f"{key}: {value}")
        lines.append("---")
        lines.append("")

        # Add preserved prose content (without frontmatter, it's already added)
        if prose_content:
            lines.append(prose_content)
            # Ensure there's a blank line before goal sections
            if not prose_content.endswith("\n\n"):
                lines.append("")

        # Active goals
        lines.append("## Active")
        lines.append("")
        for goal in goal_list.active:
            checkbox = self._get_checkbox(goal)
            priority_tag = f" #{goal.priority}" if goal.priority else ""
            lines.append(f"- {checkbox} {goal.text}{priority_tag}")
        lines.append("")

        # Future goals
        if goal_list.future:
            lines.append("## Future")
            lines.append("")
            for goal in goal_list.future:
                checkbox = self._get_checkbox(goal)
                priority_tag = f" #{goal.priority}" if goal.priority else ""
                lines.append(f"- {checkbox} {goal.text}{priority_tag}")
            lines.append("")

        # Non-goals (also with checkboxes and priority)
        if goal_list.non_goals:
            lines.append("## Non-Goals")
            lines.append("")
            for goal in goal_list.non_goals:
                checkbox = self._get_checkbox(goal)
                priority_tag = f" #{goal.priority}" if goal.priority else ""
                lines.append(f"- {checkbox} {goal.text}{priority_tag}")
            lines.append("")

        atomic_write(self.path, "\n".join(lines))

    def _extract_prose_content(self, content: str) -> str:
        """Extract prose content before goal sections.

        Preserves everything between the frontmatter and the first goal section
        (## Active, ## Future, or ## Non-Goals).

        Args:
            content: Full file content

        Returns:
            Prose content without frontmatter, or empty string
        """
        if not content:
            return ""

        # Remove frontmatter
        lines = content.split("\n")
        start_idx = 0

        # Skip frontmatter (between --- markers)
        if lines and lines[0].strip() == "---":
            for i, line in enumerate(lines[1:], 1):
                if line.strip() == "---":
                    start_idx = i + 1
                    break

        # Find where goal sections start
        goal_section_headers = ["## active", "## future", "## non-goal", "## non goal"]
        end_idx = len(lines)

        for i, line in enumerate(lines[start_idx:], start_idx):
            line_lower = line.strip().lower()
            if any(line_lower.startswith(header) for header in goal_section_headers):
                end_idx = i
                break

        # Extract prose content
        prose_lines = lines[start_idx:end_idx]

        # Strip leading/trailing empty lines but preserve internal structure
        while prose_lines and not prose_lines[0].strip():
            prose_lines.pop(0)
        while prose_lines and not prose_lines[-1].strip():
            prose_lines.pop()

        return "\n".join(prose_lines)

    @staticmethod
    def _get_checkbox(goal: Goal) -> str:
        """Get the checkbox marker for a goal's state.

        Args:
            goal: Goal to get checkbox for

        Returns:
            "[x]" for completed, "[~]" for partial, "[ ]" for not achieved
        """
        if goal.completed:
            return "[x]"
        elif getattr(goal, 'partial', False):
            return "[~]"
        else:
            return "[ ]"