import os import json import re import sys import yaml # Ensure UTF-8 output for Windows compatibility if sys.platform == 'win32': import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') def parse_frontmatter(content): """ Parses YAML frontmatter, sanitizing unquoted values containing @. Handles single values and comma-separated lists by quoting the entire line. """ fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL) if not fm_match: return {} yaml_text = fm_match.group(1) # Process line by line to handle values containing @ and commas sanitized_lines = [] for line in yaml_text.splitlines(): # Match "key: value" (handles keys with dashes like 'package-name') match = re.match(r'^(\s*[\w-]+):\s*(.*)$', line) if match: key, val = match.groups() val_s = val.strip() # If value contains @ and isn't already quoted, wrap the whole string in double quotes if '@' in val_s and not (val_s.startswith('"') or val_s.startswith("'")): # Escape any existing double quotes within the value string safe_val = val_s.replace('"', '\\"') line = f'{key}: "{safe_val}"' sanitized_lines.append(line) sanitized_yaml = '\n'.join(sanitized_lines) try: return yaml.safe_load(sanitized_yaml) or {} except yaml.YAMLError as e: print(f"⚠️ YAML parsing error: {e}") return {} def generate_index(skills_dir, output_file): print(f"🏗️ Generating index from: {skills_dir}") skills = [] for root, dirs, files in os.walk(skills_dir): # Skip .disabled or hidden directories dirs[:] = [d for d in dirs if not d.startswith('.')] if "SKILL.md" in files: skill_path = os.path.join(root, "SKILL.md") dir_name = os.path.basename(root) parent_dir = os.path.basename(os.path.dirname(root)) # Default values skill_info = { "id": dir_name, "path": os.path.relpath(root, os.path.dirname(skills_dir)), "category": parent_dir if parent_dir != "skills" else None, # Will be overridden by frontmatter if present "name": dir_name.replace("-", " ").title(), "description": "", "risk": "unknown", "source": "unknown", "date_added": None } try: with open(skill_path, 'r', encoding='utf-8') as f: content = f.read() except Exception as e: print(f"⚠️ Error reading {skill_path}: {e}") continue # Parse Metadata metadata = parse_frontmatter(content) # Merge Metadata (frontmatter takes priority) if "name" in metadata: skill_info["name"] = metadata["name"] if "description" in metadata: skill_info["description"] = metadata["description"] if "risk" in metadata: skill_info["risk"] = metadata["risk"] if "source" in metadata: skill_info["source"] = metadata["source"] if "date_added" in metadata: skill_info["date_added"] = metadata["date_added"] # Category: prefer frontmatter, then folder structure, then default if "category" in metadata: skill_info["category"] = metadata["category"] elif skill_info["category"] is None: skill_info["category"] = "uncategorized" # Fallback for description if missing in frontmatter (legacy support) if not skill_info["description"]: body = content fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL) if fm_match: body = content[fm_match.end():].strip() # Simple extraction of first non-header paragraph lines = body.split('\n') desc_lines = [] for line in lines: if line.startswith('#') or not line.strip(): if desc_lines: break continue desc_lines.append(line.strip()) if desc_lines: skill_info["description"] = " ".join(desc_lines)[:250].strip() skills.append(skill_info) # Sort validation: by name skills.sort(key=lambda x: (x["name"].lower(), x["id"].lower())) with open(output_file, 'w', encoding='utf-8') as f: json.dump(skills, f, indent=2) print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}") return skills if __name__ == "__main__": base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) skills_path = os.path.join(base_dir, "skills") output_path = os.path.join(base_dir, "skills_index.json") generate_index(skills_path, output_path)