import os import json import pathlib import re import sys from collections.abc import Mapping from datetime import date, datetime import yaml from _project_paths import find_repo_root from plugin_compatibility import build_report as build_plugin_compatibility_report from plugin_compatibility import compatibility_by_path as plugin_compatibility_by_path # Ensure UTF-8 output for Windows compatibility if sys.platform == 'win32': import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') CATEGORY_RULES = [ { "name": "security", "keywords": [ "security", "auth", "authentication", "authorization", "oauth", "jwt", "cryptography", "encryption", "vulnerability", "threat", "pentest", "xss", "sqli", "gdpr", "pci", "compliance", ], }, { "name": "testing", "keywords": [ "test", "testing", "tdd", "qa", "e2e", "playwright", "cypress", "pytest", "jest", "benchmark", "evaluation", "end to end", ], "strong_keywords": ["playwright", "cypress", "pytest", "jest", "e2e", "end to end"], }, { "name": "automation", "keywords": [ "automation", "workflow", "trigger", "integration", "slack", "airtable", "calendar", "gmail", "google", "hubspot", "notion", "zendesk", "stripe", "shopify", "sendgrid", "clickup", "n8n", "zapier", "make", "zoom", ], }, { "name": "devops", "keywords": [ "docker", "kubernetes", "k8s", "helm", "terraform", "deploy", "deployment", "cicd", "gitops", "observability", "monitoring", "grafana", "prometheus", "incident", "sre", "tracing", ], }, { "name": "cloud", "keywords": [ "aws", "azure", "gcp", "cloud", "serverless", "lambda", "storage", "functions", "cdn", "azure", "azd", ], }, { "name": "database", "keywords": [ "database", "sql", "postgres", "postgresql", "mysql", "mongodb", "redis", "orm", "schema", "migration", "query", "prisma", ], }, { "name": "ai-ml", "keywords": [ "ai", "ml", "llm", "agent", "agents", "gpt", "embedding", "vector", "rag", "prompt", "model", "training", "inference", "pytorch", "tensorflow", "hugging", "openai", ], }, { "name": "mobile", "keywords": [ "mobile", "android", "ios", "swift", "swiftui", "kotlin", "flutter", "expo", "react native", "app store", "play store", "jetpack compose", ], }, { "name": "game-development", "keywords": [ "game", "unity", "unreal", "godot", "threejs", "3d", "2d", "shader", "rendering", "webgl", "physics", ], }, { "name": "web-development", "keywords": [ "web", "frontend", "react", "nextjs", "vue", "angular", "svelte", "tailwind", "css", "html", "browser", "extension", "component", "ui", "ux", "javascript", "typescript", ], }, { "name": "backend", "keywords": [ "backend", "api", "fastapi", "django", "flask", "express", "node", "server", "middleware", "graphql", "rest", ], }, { "name": "data-science", "keywords": [ "data", "analytics", "pandas", "numpy", "statistics", "matplotlib", "plotly", "seaborn", "scipy", "notebook", ], }, { "name": "content", "keywords": [ "content", "copy", "copywriting", "writing", "documentation", "transcription", "transcribe", "seo", "blog", "markdown", ], }, { "name": "business", "keywords": [ "business", "product", "market", "sales", "finance", "startup", "legal", "customer", "competitive", "pricing", "kpi", ], }, { "name": "architecture", "keywords": [ "architecture", "adr", "microservices", "ddd", "domain", "cqrs", "saga", "patterns", ], }, ] FAMILY_CATEGORY_RULES = [ ("azure-", "cloud"), ("aws-", "cloud"), ("gcp-", "cloud"), ("apify-", "automation"), ("google-", "automation"), ("n8n-", "automation"), ("makepad-", "development"), ("robius-", "development"), ("avalonia-", "development"), ("hig-", "development"), ("fp-", "development"), ("fp-ts-", "development"), ("threejs-", "web-development"), ("react-", "web-development"), ("vue-", "web-development"), ("angular-", "web-development"), ("browser-", "web-development"), ("expo-", "mobile"), ("swiftui-", "mobile"), ("android-", "mobile"), ("ios-", "mobile"), ("hugging-face-", "ai-ml"), ("agent-", "ai-ml"), ("agents-", "ai-ml"), ("ai-", "ai-ml"), ("claude-", "ai-ml"), ("context-", "ai-ml"), ("fal-", "ai-ml"), ("yann-", "ai-ml"), ("llm-", "ai-ml"), ("rag-", "ai-ml"), ("embedding-", "ai-ml"), ("odoo-", "business"), ("product-", "business"), ("data-", "data-science"), ("wiki-", "content"), ("documentation-", "content"), ("copy", "content"), ("audio-", "content"), ("video-", "content"), ("api-", "backend"), ("django-", "backend"), ("fastapi-", "backend"), ("backend-", "backend"), ("python-", "development"), ("bash-", "development"), ("code-", "development"), ("codebase-", "development"), ("error-", "development"), ("framework-", "development"), ("debugging-", "development"), ("javascript-", "development"), ("go-", "development"), ("performance-", "development"), ("dbos-", "development"), ("conductor-", "workflow"), ("workflow-", "workflow"), ("create-", "workflow"), ("git-", "workflow"), ("github-", "workflow"), ("gitlab-", "workflow"), ("skill-", "meta"), ("cc-skill-", "meta"), ("tdd-", "testing"), ("test-", "testing"), ("security-", "security"), ("database-", "database"), ("c4-", "architecture"), ("deployment-", "devops"), ("incident-", "devops"), ("terraform-", "devops"), ] CURATED_CATEGORY_OVERRIDES = { "ai-agents-architect": "ai-agents", "agent-evaluation": "ai-agents", "agent-manager-skill": "ai-agents", "langgraph": "ai-agents", "multi-agent-patterns": "ai-agents", "pydantic-ai": "ai-agents", "plaid-fintech": "api-integration", "stripe-integration": "api-integration", "paypal-integration": "api-integration", "hubspot-integration": "api-integration", "twilio-communications": "api-integration", "pakistan-payments-stack": "api-integration", "javascript-typescript-typescript-scaffold": "app-builder", "fastapi-templates": "app-builder", "frontend-mobile-development-component-scaffold": "app-builder", "templates": "app-builder", "blockchain-developer": "blockchain", "crypto-bd-agent": "blockchain", "defi-protocol-templates": "blockchain", "goldrush-api": "blockchain", "lightning-architecture-review": "blockchain", "lightning-channel-factories": "blockchain", "lightning-factory-explainer": "blockchain", "web3-testing": "blockchain", "javascript-pro": "code", "python-pro": "code", "typescript-pro": "code", "golang-pro": "code", "rust-pro": "code", "uncle-bob-craft": "code-quality", "clean-code": "code-quality", "kaizen": "code-quality", "code-review-checklist": "code-quality", "codebase-cleanup-tech-debt": "code-quality", "code-refactoring-refactor-clean": "code-quality", "comprehensive-review-full-review": "code-quality", "comprehensive-review-pr-enhance": "code-quality", "data-engineer": "data", "dbt-transformation-patterns": "data", "analytics-tracking": "data", "sql-pro": "data", "web-scraper": "data", "x-twitter-scraper": "data", "ai-engineering-toolkit": "data-ai", "embedding-strategies": "data-ai", "llm-app-patterns": "data-ai", "local-llm-expert": "data-ai", "rag-engineer": "data-ai", "seek-and-analyze-video": "data-ai", "vector-database-engineer": "data-ai", "database-admin": "database-processing", "database-architect": "database-processing", "database-design": "database-processing", "database-optimizer": "database-processing", "base": "database-processing", "using-neon": "database-processing", "bug-hunter": "development-and-testing", "debugging-strategies": "development-and-testing", "openclaw-github-repo-commander": "development-and-testing", "systematic-debugging": "development-and-testing", "test-fixing": "development-and-testing", "antigravity-design-expert": "design", "design-md": "design", "design-orchestration": "design", "design-spells": "design", "stitch-ui-design": "design", "web-design-guidelines": "design", "docx-official": "document-processing", "doc-coauthoring": "document-processing", "pdf": "document-processing", "pdf-official": "document-processing", "writer": "document-processing", "landing-page-generator": "front-end", "frontend-design": "front-end", "frontend-developer": "front-end", "frontend-dev-guidelines": "front-end", "ui-ux-pro-max": "front-end", "astro": "frontend", "nextjs-best-practices": "frontend", "react-patterns": "frontend", "sveltekit": "frontend", "tailwind-patterns": "frontend", "django-pro": "framework", "fastapi-pro": "framework", "nestjs-expert": "framework", "nextjs-app-router-patterns": "framework", "trpc-fullstack": "framework", "typescript-expert": "framework", "algorithmic-art": "graphics-processing", "canvas-design": "graphics-processing", "draw": "graphics-processing", "image-studio": "graphics-processing", "imagen": "graphics-processing", "laravel-expert": "framework", "laravel-security-audit": "security", "advogado-criminal": "legal", "advogado-especialista": "legal", "customs-trade-compliance": "legal", "employment-contract-templates": "legal", "legal-advisor": "legal", "lex": "legal", "app-store-optimization": "marketing", "brand-guidelines": "marketing", "brand-guidelines-anthropic": "marketing", "brand-guidelines-community": "marketing", "content-creator": "marketing", "copy-editing": "marketing", "copywriting": "marketing", "email-sequence": "marketing", "free-tool-strategy": "marketing", "growth-engine": "marketing", "instagram": "marketing", "instagram-automation": "marketing", "launch-strategy": "marketing", "linkedin-automation": "marketing", "linkedin-cli": "marketing", "marketing-ideas": "marketing", "marketing-psychology": "marketing", "programmatic-seo": "marketing", "social-content": "marketing", "social-orchestrator": "marketing", "remotion-best-practices": "media", "sora": "media", "videodb": "media", "videodb-skills": "media", "agent-memory-systems": "memory", "context-window-management": "memory", "conversation-memory": "memory", "hierarchical-agent-memory": "memory", "memory-systems": "memory", "recallmax": "memory", "memory-forensics": "security", "memory-safety-patterns": "development", "m365-agents-dotnet": "ai-agents", "m365-agents-ts": "ai-agents", "hosted-agents": "ai-agents", "hosted-agents-v2-py": "ai-agents", "multi-advisor": "ai-agents", "multi-platform-apps-multi-platform": "development", "mobile-design": "mobile", "mobile-security-coder": "mobile", "blueprint": "planning", "concise-planning": "planning", "planning-with-files": "planning", "track-management": "planning", "google-slides-automation": "presentation-processing", "frontend-slides": "presentation-processing", "impress": "presentation-processing", "pptx-official": "presentation-processing", "file-organizer": "productivity", "google-calendar-automation": "productivity", "interview-coach": "productivity", "office-productivity": "productivity", "risk-manager": "business", "risk-metrics-calculation": "business", "github-issue-creator": "project-management", "linear-claude-skill": "project-management", "progressive-estimation": "project-management", "team-collaboration-issue": "project-management", "team-collaboration-standup-notes": "project-management", "freshservice-automation": "project-management", "wrike-automation": "project-management", "distributed-debugging-debug-trace": "reliability", "distributed-tracing": "reliability", "incident-responder": "reliability", "observability-engineer": "reliability", "postmortem-writing": "reliability", "slo-implementation": "reliability", "tool-use-guardian": "reliability", "calc": "spreadsheet-processing", "google-sheets-automation": "spreadsheet-processing", "googlesheets-automation": "spreadsheet-processing", "xlsx-official": "spreadsheet-processing", "awt-e2e-testing": "test-automation", "browser-automation": "test-automation", "e2e-testing-patterns": "test-automation", "go-playwright": "test-automation", "playwright-java": "test-automation", "playwright-skill": "test-automation", "test-automator": "test-automation", "webapp-testing": "test-automation", "ffuf-claude-skill": "security", "ffuf-web-fuzzing": "security", "file-path-traversal": "security", "file-uploads": "security", "semgrep-rule-creator": "security", "semgrep-rule-variant-creator": "security", "seo-audit": "content", "seo-forensic-incident-response": "content", "fixing-accessibility": "front-end", "fixing-metadata": "front-end", "fixing-motion-performance": "front-end", "internal-comms-anthropic": "content", "internal-comms-community": "content", "leiloeiro-avaliacao": "leiloeiro", "leiloeiro-edital": "leiloeiro", "leiloeiro-ia": "leiloeiro", "leiloeiro-juridico": "leiloeiro", "leiloeiro-mercado": "leiloeiro", "leiloeiro-risco": "leiloeiro", "linux-privilege-escalation": "security", "linux-shell-scripting": "development", "mcp-builder": "ai-agents", "mcp-builder-ms": "ai-agents", "monorepo-architect": "development", "monorepo-management": "development", "pentest-checklist": "security", "pentest-commands": "security", "salesforce-automation": "api-integration", "salesforce-development": "api-integration", "segment-automation": "data", "segment-cdp": "data", "senior-architect": "development", "senior-fullstack": "development", "shopify-apps": "api-integration", "shopify-development": "api-integration", "sred-project-organizer": "project-management", "sred-work-summary": "project-management", "startup-business-analyst-financial-projections": "business", "startup-financial-modeling": "business", "telegram-automation": "api-integration", "telegram-bot-builder": "api-integration", "temporal-golang-pro": "workflow", "temporal-python-pro": "workflow", "using-git-worktrees": "development", "using-superpowers": "meta", "varlock": "security", "varlock-claude-skill": "security", "vexor": "development", "vexor-cli": "development", "audio-transcriber": "voice-agents", "fal-audio": "voice-agents", "pipecat-friday-agent": "voice-agents", "3d-web-experience": "design", "ab-test-setup": "marketing", "acceptance-orchestrator": "workflow", "accessibility-compliance-accessibility-audit": "design", "active-directory-attacks": "security", "activecampaign-automation": "marketing", "alpha-vantage": "data", "amplitude-automation": "data", "analytics-product": "data", "analyze-project": "meta", "antigravity-workflows": "workflow", "anti-reversing-techniques": "security", "arm-cortex-expert": "development", "asana-automation": "project-management", "ask-questions-if-underspecified": "workflow", "audit-context-building": "meta", "basecamp-automation": "project-management", "bazel-build-optimization": "development", "behavioral-modes": "meta", "bitbucket-automation": "workflow", "blog-writing-guide": "content", "box-automation": "productivity", "brevo-automation": "marketing", "broken-authentication": "security", "building-native-ui": "mobile", "bullmq-specialist": "framework", "burp-suite-testing": "security", "business-analyst": "business", "busybox-on-windows": "development", "c-pro": "code", "cal-com-automation": "productivity", "calendly-automation": "productivity", "canva-automation": "design", "carrier-relationship-management": "business", "changelog-automation": "workflow", "cloudflare-workers-expert": "framework", "closed-loop-delivery": "workflow", "commit": "workflow", "confluence-automation": "project-management", "constant-time-analysis": "security", "context7-auto-research": "meta", "convex": "framework", "convertkit-automation": "marketing", "cpp-pro": "code", "cred-omega": "security", "csharp-pro": "code", "datadog-automation": "reliability", "dependency-upgrade": "development", "differential-review": "security", "discord-automation": "api-integration", "docusign-automation": "productivity", "dotnet-architect": "development", "dropbox-automation": "productivity", "dx-optimizer": "development", "elixir-pro": "code", "electron-development": "development", "energy-procurement": "business", "environment-setup-guide": "development", "ethical-hacking-methodology": "security", "executing-plans": "workflow", "fda-food-safety-auditor": "legal", "fda-medtech-compliance-auditor": "legal", "figma-automation": "design", "filesystem-context": "meta", "flutter-expert": "mobile", "gha-security-review": "security", "gh-review-requests": "workflow", "gmail-automation": "productivity", "haskell-pro": "code", "hr-pro": "business", "inngest": "workflow", "inventory-demand-planning": "business", "iterate-pr": "workflow", "java-pro": "code", "jira-automation": "project-management", "klaviyo-automation": "marketing", "linear-automation": "project-management", "mailchimp-automation": "marketing", "microsoft-teams-automation": "api-integration", "miro-automation": "project-management", "mixpanel-automation": "data", "ml-pipeline-workflow": "workflow", "monday-automation": "project-management", "on-call-handoff-patterns": "reliability", "one-drive-automation": "productivity", "pagerduty-automation": "reliability", "php-pro": "code", "pipedrive-automation": "business", "plan-writing": "planning", "postmark-automation": "api-integration", "posthog-automation": "data", "pr-writer": "workflow", "privacy-by-design": "security", "receiving-code-review": "workflow", "reddit-automation": "marketing", "requesting-code-review": "workflow", "ruby-pro": "code", "scala-pro": "code", "sentry-automation": "reliability", "service-mesh-expert": "reliability", "shadcn": "framework", "square-automation": "api-integration", "subagent-driven-development": "workflow", "tanstack-query-expert": "framework", "tiktok-automation": "marketing", "todoist-automation": "project-management", "trello-automation": "project-management", "trigger-dev": "workflow", "twitter-automation": "marketing", "ui-visual-validator": "design", "unreal-engine-cpp-pro": "code", "uv-package-manager": "development", "webflow-automation": "design", "whatsapp-automation": "api-integration", "writing-plans": "planning", "youtube-automation": "marketing", "zod-validation-expert": "framework", "zoho-crm-automation": "business", "address-github-comments": "workflow", "airflow-dag-patterns": "workflow", "algolia-search": "api-integration", "android_ui_verification": "test-automation", "application-performance-performance-optimization": "reliability", "architect-review": "architecture", "astropy": "science", "async-python-patterns": "development", "auri-core": "voice-agents", "binary-analysis-patterns": "security", "biopython": "science", "build": "workflow", "burpsuite-project-parser": "security", "cdk-patterns": "cloud", "chat-widget": "front-end", "chrome-extension-developer": "front-end", "cirq": "science", "citation-management": "content", "cloudformation-best-practices": "cloud", "computer-vision-expert": "ai-ml", "cqrs-implementation": "architecture", "ddd-strategic-design": "architecture", "deep-research": "ai-ml", "dispatching-parallel-agents": "ai-agents", "emergency-card": "health", "evaluation": "ai-ml", "event-store-design": "architecture", "exa-search": "data-ai", "explain-like-socrates": "content", "family-health-analyzer": "health", "find-bugs": "code-quality", "finishing-a-development-branch": "workflow", "firebase": "cloud", "firmware-analyst": "security", "fitness-analyzer": "health", "fix-review": "code-quality", "food-database-query": "health", "freshdesk-automation": "automation", "form-cro": "marketing", "full-stack-orchestration-full-stack-feature": "workflow", "game-development": "game-development", "gdpr-data-handling": "security", "gemini-api-dev": "ai-ml", "geo-fundamentals": "marketing", "goal-analyzer": "health", "graphql-architect": "architecture", "health-trend-analyzer": "health", "helpdesk-automation": "automation", "html-injection-testing": "security", "hybrid-cloud-networking": "cloud", "i18n-localization": "development", "idor-testing": "security", "interactive-portfolio": "front-end", "intercom-automation": "automation", "issues": "workflow", "keyword-extractor": "marketing", "legacy-modernizer": "development", "lint-and-validate": "workflow", "local-legal-seo-audit": "marketing", "malware-analyst": "security", "mental-health-analyzer": "health", "metasploit-framework": "security", "micro-saas-launcher": "business", "modern-javascript-patterns": "development", "monetization": "business", "mtls-configuration": "security", "native-data-fetching": "development", "networkx": "science", "notion-template-business": "business", "nutrition-analyzer": "health", "nx-workspace-patterns": "development", "onboarding-cro": "marketing", "occupational-health-analyzer": "health", "openapi-spec-generation": "api-integration", "oral-health-analyzer": "health", "page-cro": "marketing", "paid-ads": "marketing", "parallel-agents": "ai-agents", "payment-integration": "api-integration", "paywall-upgrade-cro": "marketing", "popup-cro": "marketing", "privilege-escalation-methods": "security", "production-scheduling": "business", "professional-proofreader": "content", "progressive-web-app": "front-end", "projection-patterns": "architecture", "protocol-reverse-engineering": "security", "pydantic-models-py": "development", "pypict-skill": "testing", "qiskit": "science", "quality-nonconformance": "business", "readme": "content", "red-team-tactics": "security", "reference-builder": "content", "referral-program": "marketing", "rehabilitation-analyzer": "health", "render-automation": "automation", "returns-reverse-logistics": "business", "reverse-engineer": "security", "rust-async-patterns": "development", "saas-mvp-launcher": "business", "sast-configuration": "security", "scanpy": "science", "schema-markup": "marketing", "scientific-writing": "content", "screen-reader-testing": "testing", "screenshots": "marketing", "scroll-experience": "front-end", "search-specialist": "content", "seaborn": "science", "secrets-management": "security", "shodan-reconnaissance": "security", "signup-flow-cro": "marketing", "similarity-search-patterns": "data-ai", "skin-health-analyzer": "health", "sleep-analyzer": "health", "spec-to-code-compliance": "code-quality", "sql-injection-testing": "security", "ssh-penetration-testing": "security", "systems-programming-rust-project": "development", "tcm-constitution-analyzer": "health", "team-composition-analysis": "business", "travel-health-analyzer": "health", "vibe-code-auditor": "code-quality", "vibers-code-review": "code-quality", "voice-ai-development": "voice-agents", "weightloss-analyzer": "health", "windows-privilege-escalation": "security", "wordpress-penetration-testing": "security", "xss-html-injection": "security", "backtesting-frameworks": "business", "bamboohr-automation": "business", "beautiful-prose": "content", "clarity-gate": "data-ai", "codex-review": "code-quality", "customer-support": "business", "debugger": "development-and-testing", "devcontainer-setup": "development", "diary": "meta", "dwarf-expert": "development", "firecrawl-scraper": "data", "godot-4-migration": "game-development", "grpc-golang": "development", "istio-traffic-management": "cloud", "julia-pro": "code", "kotlin-coroutines-expert": "development", "matplotlib": "science", "mermaid-expert": "content", "minecraft-bukkit-pro": "game-development", "moodle-external-api-development": "api-integration", "nanobanana-ppt-skills": "presentation-processing", "notebooklm": "data-ai", "prompt-library": "content", "quant-analyst": "business", "remotion": "media", "server-management": "reliability", "sexual-health-analyzer": "health", "shellcheck-configuration": "code-quality", "slack-bot-builder": "api-integration", "software-architecture": "architecture", "spark-optimization": "data", "statsmodels": "science", "stability-ai": "media", "sympy": "science", "task-intelligence": "workflow", "tavily-web": "data-ai", "theme-factory": "design", "turborepo-caching": "development", "tutorial-engineer": "content", "typescript-advanced-types": "code", "unity-ecs-patterns": "game-development", "unsplash-integration": "api-integration", "upgrading-expo": "mobile", "upstash-qstash": "workflow", "vector-index-tuning": "data-ai", "verification-before-completion": "workflow", "viral-generator-builder": "marketing", "vizcom": "design", "wcag-audit-patterns": "design", "web-performance-optimization": "front-end", "wireshark-analysis": "security", "x-article-publisher-skill": "marketing", "zeroize-audit": "security", "zustand-store-ts": "frontend", } def tokenize(text): return re.findall(r"[a-z0-9]+", text.lower()) def infer_category(skill_id, skill_name, description): for prefix, category in FAMILY_CATEGORY_RULES: if skill_id.startswith(prefix): return category normalized_name = skill_name if isinstance(skill_name, str) else "" normalized_description = description if isinstance(description, str) else "" combined_text = f"{skill_id} {normalized_name} {normalized_description}".lower() token_set = set(tokenize(combined_text)) scores = {} for rule in CATEGORY_RULES: score = 0 strong_keywords = {keyword.lower() for keyword in rule.get("strong_keywords", [])} for keyword in rule["keywords"]: keyword_lower = keyword.lower() if " " in keyword_lower: if keyword_lower in combined_text: score += 4 if keyword_lower in strong_keywords else 3 continue if keyword_lower in token_set: score += 3 if keyword_lower in strong_keywords else 2 elif keyword_lower in combined_text: score += 1 if score > 0: scores[rule["name"]] = score if not scores: return None ranked = sorted(scores.items(), key=lambda item: (-item[1], item[0])) best_category, best_score = ranked[0] second_score = ranked[1][1] if len(ranked) > 1 else 0 if best_score < 4: return None if best_score < 8 and (best_score - second_score) < 2: return None return best_category def normalize_category(category): if not isinstance(category, str): return category return category.strip().lower() def normalize_yaml_value(value): if isinstance(value, Mapping): return {key: normalize_yaml_value(val) for key, val in value.items()} if isinstance(value, list): return [normalize_yaml_value(item) for item in value] if isinstance(value, (date, datetime)): return value.isoformat() if isinstance(value, (bytes, bytearray)): return bytes(value).decode("utf-8", errors="replace") return value def coerce_metadata_text(value): if value is None or isinstance(value, (Mapping, list, tuple, set)): return None if isinstance(value, str): return value return str(value) def parse_frontmatter(content): """ Parses YAML frontmatter, sanitizing unquoted values containing @. Handles single values and comma-separated lists by quoting the entire line. """ fm_match = re.search(r'^---\s*\n(.*?)\n?---(?:\s*\n|$)', content, re.DOTALL) if not fm_match: return {} yaml_text = fm_match.group(1) # Process line by line to handle values containing @ and commas sanitized_lines = [] for line in yaml_text.splitlines(): # Match "key: value" (handles keys with dashes like 'package-name') match = re.match(r'^(\s*[\w-]+):\s*(.*)$', line) if match: key, val = match.groups() val_s = val.strip() # If value contains @ and isn't already quoted, wrap the whole string in double quotes if '@' in val_s and not (val_s.startswith('"') or val_s.startswith("'")): # Escape any existing double quotes within the value string safe_val = val_s.replace('"', '\\"') line = f'{key}: "{safe_val}"' sanitized_lines.append(line) sanitized_yaml = '\n'.join(sanitized_lines) try: parsed = yaml.safe_load(sanitized_yaml) or {} parsed = normalize_yaml_value(parsed) if not isinstance(parsed, Mapping): print("⚠️ YAML frontmatter must be a mapping/object") return {} return dict(parsed) except yaml.YAMLError as e: print(f"⚠️ YAML parsing error: {e}") return {} def generate_index(skills_dir, output_file, compatibility_report=None): print(f"🏗️ Generating index from: {skills_dir}") skills = [] if compatibility_report is None: compatibility_report = build_plugin_compatibility_report(pathlib.Path(skills_dir)) compatibility_lookup = plugin_compatibility_by_path(compatibility_report) for root, dirs, files in os.walk(skills_dir): # Skip .disabled or hidden directories dirs[:] = [d for d in dirs if not d.startswith('.')] if "SKILL.md" in files: skill_path = os.path.join(root, "SKILL.md") if os.path.islink(skill_path): print(f"⚠️ Skipping symlinked SKILL.md: {skill_path}") continue dir_name = os.path.basename(root) parent_dir = os.path.basename(os.path.dirname(root)) # Default values rel_path = os.path.relpath(root, os.path.dirname(skills_dir)) # Force forward slashes for cross-platform JSON compatibility skill_info = { "id": dir_name, "path": rel_path.replace(os.sep, '/'), "category": parent_dir if parent_dir != "skills" else None, # Will be overridden by frontmatter if present "name": dir_name.replace("-", " ").title(), "description": "", "risk": "unknown", "source": "unknown", "date_added": None, "plugin": { "targets": { "codex": "supported", "claude": "supported", }, "setup": { "type": "none", "summary": "", "docs": None, }, "reasons": [], }, } try: with open(skill_path, 'r', encoding='utf-8') as f: content = f.read() except Exception as e: print(f"⚠️ Error reading {skill_path}: {e}") continue # Parse Metadata metadata = parse_frontmatter(content) # Merge Metadata (frontmatter takes priority) name = coerce_metadata_text(metadata.get("name")) description = coerce_metadata_text(metadata.get("description")) risk = coerce_metadata_text(metadata.get("risk")) source = coerce_metadata_text(metadata.get("source")) date_added = coerce_metadata_text(metadata.get("date_added")) category = coerce_metadata_text(metadata.get("category")) if name is not None: skill_info["name"] = name if description is not None: skill_info["description"] = description if risk is not None: skill_info["risk"] = risk if source is not None: skill_info["source"] = source if date_added is not None: skill_info["date_added"] = date_added # Category: prefer frontmatter, then folder structure, then conservative inference if category is not None: skill_info["category"] = category elif skill_info["category"] is None: inferred_category = infer_category( skill_info["id"], skill_info["name"], skill_info["description"], ) skill_info["category"] = inferred_category or "uncategorized" if skill_info["id"] in CURATED_CATEGORY_OVERRIDES: skill_info["category"] = CURATED_CATEGORY_OVERRIDES[skill_info["id"]] skill_info["category"] = normalize_category(skill_info["category"]) plugin_info = compatibility_lookup.get(skill_info["path"]) if plugin_info: skill_info["plugin"] = { "targets": dict(plugin_info["targets"]), "setup": dict(plugin_info["setup"]), "reasons": list(plugin_info["reasons"]), } # Fallback for description if missing in frontmatter (legacy support) if not skill_info["description"]: body = content fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL) if fm_match: body = content[fm_match.end():].strip() # Simple extraction of first non-header paragraph lines = body.split('\n') desc_lines = [] for line in lines: if line.startswith('#') or not line.strip(): if desc_lines: break continue desc_lines.append(line.strip()) if desc_lines: skill_info["description"] = " ".join(desc_lines)[:250].strip() skills.append(skill_info) # Sort validation: by name skills.sort(key=lambda x: (x["name"].lower(), x["id"].lower())) with open(output_file, 'w', encoding='utf-8', newline='\n') as f: json.dump(skills, f, indent=2) print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}") return skills if __name__ == "__main__": base_dir = str(find_repo_root(__file__)) skills_path = os.path.join(base_dir, "skills") output_path = os.path.join(base_dir, "skills_index.json") generate_index(skills_path, output_path)