meta(index): Expand specialist skill categories

2026-03-20 09:53:45 +01:00
parent 34776e3eac
commit ea7bcfb70a
4 changed files with 427 additions and 299 deletions
--- a/tools/scripts/generate_index.py
+++ b/tools/scripts/generate_index.py
@@ -205,32 +205,143 @@ FAMILY_CATEGORY_RULES = [
    ("terraform-", "devops"),
 ]

-CATEGORY_ALIASES = {
-    # Legacy/specialized labels normalized to broader catalog buckets
-    "ai-agents": "ai-ml",
-    "voice-agents": "ai-ml",
-    "data-ai": "ai-ml",
-    "memory": "ai-ml",
-    "api-integration": "backend",
-    "blockchain": "backend",
-    "front-end": "web-development",
-    "frontend": "web-development",
-    "app-builder": "development",
-    "code": "development",
-    "code-quality": "development",
-    "development-and-testing": "development",
-    "framework": "development",
-    "database-processing": "database",
-    "document-processing": "productivity",
-    "spreadsheet-processing": "productivity",
-    "presentation-processing": "productivity",
-    "graphics-processing": "productivity",
-    "data": "data-science",
-    "marketing": "business",
-    "planning": "workflow",
-    "project-management": "workflow",
-    "reliability": "devops",
-    "test-automation": "testing",
+CURATED_CATEGORY_OVERRIDES = {
+    "ai-agents-architect": "ai-agents",
+    "agent-evaluation": "ai-agents",
+    "agent-manager-skill": "ai-agents",
+    "langgraph": "ai-agents",
+    "multi-agent-patterns": "ai-agents",
+    "pydantic-ai": "ai-agents",
+    "plaid-fintech": "api-integration",
+    "stripe-integration": "api-integration",
+    "paypal-integration": "api-integration",
+    "hubspot-integration": "api-integration",
+    "twilio-communications": "api-integration",
+    "pakistan-payments-stack": "api-integration",
+    "javascript-typescript-typescript-scaffold": "app-builder",
+    "fastapi-templates": "app-builder",
+    "frontend-mobile-development-component-scaffold": "app-builder",
+    "templates": "app-builder",
+    "blockchain-developer": "blockchain",
+    "crypto-bd-agent": "blockchain",
+    "defi-protocol-templates": "blockchain",
+    "goldrush-api": "blockchain",
+    "web3-testing": "blockchain",
+    "javascript-pro": "code",
+    "python-pro": "code",
+    "typescript-pro": "code",
+    "golang-pro": "code",
+    "rust-pro": "code",
+    "uncle-bob-craft": "code-quality",
+    "clean-code": "code-quality",
+    "kaizen": "code-quality",
+    "code-review-checklist": "code-quality",
+    "codebase-cleanup-tech-debt": "code-quality",
+    "code-refactoring-refactor-clean": "code-quality",
+    "data-engineer": "data",
+    "dbt-transformation-patterns": "data",
+    "analytics-tracking": "data",
+    "sql-pro": "data",
+    "web-scraper": "data",
+    "x-twitter-scraper": "data",
+    "ai-engineering-toolkit": "data-ai",
+    "embedding-strategies": "data-ai",
+    "llm-app-patterns": "data-ai",
+    "local-llm-expert": "data-ai",
+    "rag-engineer": "data-ai",
+    "seek-and-analyze-video": "data-ai",
+    "vector-database-engineer": "data-ai",
+    "database-admin": "database-processing",
+    "database-architect": "database-processing",
+    "database-design": "database-processing",
+    "database-optimizer": "database-processing",
+    "base": "database-processing",
+    "bug-hunter": "development-and-testing",
+    "debugging-strategies": "development-and-testing",
+    "openclaw-github-repo-commander": "development-and-testing",
+    "systematic-debugging": "development-and-testing",
+    "test-fixing": "development-and-testing",
+    "docx-official": "document-processing",
+    "doc-coauthoring": "document-processing",
+    "pdf": "document-processing",
+    "pdf-official": "document-processing",
+    "writer": "document-processing",
+    "landing-page-generator": "front-end",
+    "frontend-design": "front-end",
+    "frontend-developer": "front-end",
+    "frontend-dev-guidelines": "front-end",
+    "ui-ux-pro-max": "front-end",
+    "astro": "frontend",
+    "nextjs-best-practices": "frontend",
+    "react-patterns": "frontend",
+    "sveltekit": "frontend",
+    "tailwind-patterns": "frontend",
+    "django-pro": "framework",
+    "fastapi-pro": "framework",
+    "nestjs-expert": "framework",
+    "nextjs-app-router-patterns": "framework",
+    "trpc-fullstack": "framework",
+    "typescript-expert": "framework",
+    "algorithmic-art": "graphics-processing",
+    "canvas-design": "graphics-processing",
+    "draw": "graphics-processing",
+    "image-studio": "graphics-processing",
+    "imagen": "graphics-processing",
+    "app-store-optimization": "marketing",
+    "content-creator": "marketing",
+    "copy-editing": "marketing",
+    "copywriting": "marketing",
+    "email-sequence": "marketing",
+    "launch-strategy": "marketing",
+    "programmatic-seo": "marketing",
+    "remotion-best-practices": "media",
+    "sora": "media",
+    "videodb": "media",
+    "videodb-skills": "media",
+    "agent-memory-systems": "memory",
+    "context-window-management": "memory",
+    "conversation-memory": "memory",
+    "hierarchical-agent-memory": "memory",
+    "memory-systems": "memory",
+    "recallmax": "memory",
+    "blueprint": "planning",
+    "concise-planning": "planning",
+    "planning-with-files": "planning",
+    "track-management": "planning",
+    "google-slides-automation": "presentation-processing",
+    "frontend-slides": "presentation-processing",
+    "impress": "presentation-processing",
+    "pptx-official": "presentation-processing",
+    "file-organizer": "productivity",
+    "google-calendar-automation": "productivity",
+    "interview-coach": "productivity",
+    "office-productivity": "productivity",
+    "github-issue-creator": "project-management",
+    "linear-claude-skill": "project-management",
+    "progressive-estimation": "project-management",
+    "team-collaboration-issue": "project-management",
+    "team-collaboration-standup-notes": "project-management",
+    "distributed-tracing": "reliability",
+    "incident-responder": "reliability",
+    "observability-engineer": "reliability",
+    "postmortem-writing": "reliability",
+    "slo-implementation": "reliability",
+    "tool-use-guardian": "reliability",
+    "calc": "spreadsheet-processing",
+    "google-sheets-automation": "spreadsheet-processing",
+    "googlesheets-automation": "spreadsheet-processing",
+    "xlsx-official": "spreadsheet-processing",
+    "awt-e2e-testing": "test-automation",
+    "browser-automation": "test-automation",
+    "e2e-testing-patterns": "test-automation",
+    "go-playwright": "test-automation",
+    "playwright-java": "test-automation",
+    "playwright-skill": "test-automation",
+    "test-automator": "test-automation",
+    "webapp-testing": "test-automation",
+    "audio-transcriber": "voice-agents",
+    "fal-audio": "voice-agents",
+    "pipecat-friday-agent": "voice-agents",
 }


@@ -286,8 +397,7 @@ def infer_category(skill_id, skill_name, description):
 def normalize_category(category):
    if not isinstance(category, str):
        return category
-    normalized = category.strip().lower()
-    return CATEGORY_ALIASES.get(normalized, normalized)
+    return category.strip().lower()

 def normalize_yaml_value(value):
    if isinstance(value, Mapping):
@@ -394,6 +504,8 @@ def generate_index(skills_dir, output_file):
                    skill_info["description"],
                )
                skill_info["category"] = inferred_category or "uncategorized"
+            if skill_info["id"] in CURATED_CATEGORY_OVERRIDES:
+                skill_info["category"] = CURATED_CATEGORY_OVERRIDES[skill_info["id"]]
            skill_info["category"] = normalize_category(skill_info["category"])
            
            # Fallback for description if missing in frontmatter (legacy support)
--- a/tools/scripts/tests/test_generate_index_categories.py
+++ b/tools/scripts/tests/test_generate_index_categories.py
@@ -20,10 +20,10 @@ generate_index = load_module("tools/scripts/generate_index.py", "generate_index_


 class GenerateIndexCategoryTests(unittest.TestCase):
-    def test_normalize_category_maps_legacy_labels(self):
-        self.assertEqual(generate_index.normalize_category("front-end"), "web-development")
-        self.assertEqual(generate_index.normalize_category("ai-agents"), "ai-ml")
-        self.assertEqual(generate_index.normalize_category("document-processing"), "productivity")
+    def test_normalize_category_preserves_specialized_labels(self):
+        self.assertEqual(generate_index.normalize_category(" Front-End "), "front-end")
+        self.assertEqual(generate_index.normalize_category("Ai-Agents"), "ai-agents")
+        self.assertEqual(generate_index.normalize_category("Document-Processing"), "document-processing")

    def test_infer_category_returns_none_for_weak_signal(self):
        inferred = generate_index.infer_category(
@@ -97,9 +97,9 @@ class GenerateIndexCategoryTests(unittest.TestCase):

            self.assertEqual(categories["explicit-skill"], "custom")
            self.assertEqual(categories["nested-skill"], "bundles")
-            self.assertEqual(categories["playwright-skill"], "testing")
+            self.assertEqual(categories["playwright-skill"], "test-automation")

-    def test_generate_index_normalizes_explicit_legacy_category(self):
+    def test_generate_index_preserves_explicit_specialized_category(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            base = pathlib.Path(temp_dir)
            skills_dir = base / "skills"
@@ -113,7 +113,23 @@ class GenerateIndexCategoryTests(unittest.TestCase):
            )

            skills = generate_index.generate_index(str(skills_dir), str(output_file))
-            self.assertEqual(skills[0]["category"], "web-development")
+            self.assertEqual(skills[0]["category"], "front-end")
+
+    def test_generate_index_applies_curated_override(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            base = pathlib.Path(temp_dir)
+            skills_dir = base / "skills"
+            output_file = base / "skills_index.json"
+
+            override_dir = skills_dir / "playwright-skill"
+            override_dir.mkdir(parents=True)
+            (override_dir / "SKILL.md").write_text(
+                "---\nname: playwright-skill\ncategory: custom\ndescription: Browser automation\n---\nbody\n",
+                encoding="utf-8",
+            )
+
+            skills = generate_index.generate_index(str(skills_dir), str(output_file))
+            self.assertEqual(skills[0]["category"], "test-automation")


 if __name__ == "__main__":