CmdForge-Registry/.gitea/workflows/validate.yaml

name: Validate Tool Submission

on:
  pull_request:
    paths:
      - 'tools/**'

jobs:
  validate:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Install dependencies
        run: |
          pip install pyyaml

      - name: Get changed files
        id: changed
        run: |
          echo "files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '^tools/' | tr '\n' ' ')" >> $GITHUB_OUTPUT

      - name: Validate tool configs
        run: |
          python3 << 'EOF'
          import sys
          import yaml
          import re
          from pathlib import Path

          SEMVER_RE = re.compile(r'^\d+\.\d+\.\d+(-[0-9A-Za-z.-]+)?(\+.+)?$')
          TOOL_NAME_RE = re.compile(r'^[A-Za-z0-9-]{1,64}$')
          OWNER_RE = re.compile(r'^[a-z0-9][a-z0-9-]{0,37}[a-z0-9]$|^[a-z0-9]$')
          VALID_CATEGORIES = {
              'text-processing', 'code', 'data', 'media', 'productivity',
              'writing', 'translation', 'research', 'summarization'
          }

          errors = []
          warnings = []

          # Find all config.yaml files in tools/
          for config_path in Path('tools').glob('*/*/config.yaml'):
              owner = config_path.parent.parent.name
              tool_name = config_path.parent.name

              print(f"Validating {owner}/{tool_name}...")

              # Validate owner slug
              if not OWNER_RE.match(owner):
                  errors.append(f"{config_path}: Invalid owner slug '{owner}'")
                  continue

              # Validate tool name
              if not TOOL_NAME_RE.match(tool_name):
                  errors.append(f"{config_path}: Invalid tool name '{tool_name}'")
                  continue

              # Parse YAML
              try:
                  with open(config_path) as f:
                      config = yaml.safe_load(f)
              except yaml.YAMLError as e:
                  errors.append(f"{config_path}: Invalid YAML - {e}")
                  continue

              if not config:
                  errors.append(f"{config_path}: Empty config file")
                  continue

              # Required fields
              if not config.get('name'):
                  errors.append(f"{config_path}: Missing 'name' field")
              elif config['name'] != tool_name:
                  errors.append(f"{config_path}: name '{config['name']}' doesn't match directory '{tool_name}'")

              if not config.get('version'):
                  errors.append(f"{config_path}: Missing 'version' field")
              elif not SEMVER_RE.match(str(config['version'])):
                  errors.append(f"{config_path}: Invalid version '{config['version']}' (must be semver)")

              if not config.get('description'):
                  warnings.append(f"{config_path}: Missing 'description' field")
              elif len(config['description']) > 500:
                  errors.append(f"{config_path}: Description too long (max 500 chars)")

              # Category validation
              category = config.get('category', '')
              if category and category not in VALID_CATEGORIES:
                  warnings.append(f"{config_path}: Unknown category '{category}'")

              # Tags validation
              tags = config.get('tags', [])
              if not isinstance(tags, list):
                  errors.append(f"{config_path}: 'tags' must be a list")
              elif len(tags) > 10:
                  errors.append(f"{config_path}: Too many tags (max 10)")

              # Steps validation
              steps = config.get('steps', [])
              if not steps:
                  errors.append(f"{config_path}: Missing 'steps' field")
              else:
                  for i, step in enumerate(steps):
                      if step.get('type') == 'prompt':
                          if not step.get('prompt'):
                              errors.append(f"{config_path}: Step {i} missing 'prompt' field")
                          if not step.get('output_var'):
                              errors.append(f"{config_path}: Step {i} missing 'output_var' field")

              # Check README exists
              readme_path = config_path.parent / 'README.md'
              if not readme_path.exists():
                  warnings.append(f"{config_path}: No README.md found")

              print(f"  ✓ {owner}/{tool_name} validated")

          # Print results
          if warnings:
              print("\n⚠️  Warnings:")
              for w in warnings:
                  print(f"  - {w}")

          if errors:
              print("\n❌ Errors:")
              for e in errors:
                  print(f"  - {e}")
              sys.exit(1)

          print("\n✅ All tools validated successfully!")
          EOF

      - name: Check for secrets in prompts
        run: |
          python3 << 'EOF'
          import re
          from pathlib import Path

          SECRET_PATTERNS = [
              (r'(?i)(api[_-]?key|apikey)\s*[:=]\s*["\'][^"\']+["\']', 'API key'),
              (r'(?i)(secret|password|token)\s*[:=]\s*["\'][^"\']+["\']', 'secret/password'),
              (r'sk-[a-zA-Z0-9]{20,}', 'OpenAI API key'),
              (r'ghp_[a-zA-Z0-9]{36}', 'GitHub token'),
          ]

          issues = []
          for config_path in Path('tools').glob('*/*/config.yaml'):
              content = config_path.read_text()
              for pattern, name in SECRET_PATTERNS:
                  if re.search(pattern, content):
                      issues.append(f"{config_path}: Possible {name} found in config")

          if issues:
              print("❌ Security issues found:")
              for issue in issues:
                  print(f"  - {issue}")
              exit(1)

          print("✅ No secrets detected in configs")
          EOF