meta(index): Expand specialist skill categories

This commit is contained in:
sickn33
2026-03-20 09:53:45 +01:00
parent 34776e3eac
commit ea7bcfb70a
4 changed files with 427 additions and 299 deletions

View File

@@ -205,32 +205,143 @@ FAMILY_CATEGORY_RULES = [
("terraform-", "devops"),
]
CATEGORY_ALIASES = {
# Legacy/specialized labels normalized to broader catalog buckets
"ai-agents": "ai-ml",
"voice-agents": "ai-ml",
"data-ai": "ai-ml",
"memory": "ai-ml",
"api-integration": "backend",
"blockchain": "backend",
"front-end": "web-development",
"frontend": "web-development",
"app-builder": "development",
"code": "development",
"code-quality": "development",
"development-and-testing": "development",
"framework": "development",
"database-processing": "database",
"document-processing": "productivity",
"spreadsheet-processing": "productivity",
"presentation-processing": "productivity",
"graphics-processing": "productivity",
"data": "data-science",
"marketing": "business",
"planning": "workflow",
"project-management": "workflow",
"reliability": "devops",
"test-automation": "testing",
CURATED_CATEGORY_OVERRIDES = {
"ai-agents-architect": "ai-agents",
"agent-evaluation": "ai-agents",
"agent-manager-skill": "ai-agents",
"langgraph": "ai-agents",
"multi-agent-patterns": "ai-agents",
"pydantic-ai": "ai-agents",
"plaid-fintech": "api-integration",
"stripe-integration": "api-integration",
"paypal-integration": "api-integration",
"hubspot-integration": "api-integration",
"twilio-communications": "api-integration",
"pakistan-payments-stack": "api-integration",
"javascript-typescript-typescript-scaffold": "app-builder",
"fastapi-templates": "app-builder",
"frontend-mobile-development-component-scaffold": "app-builder",
"templates": "app-builder",
"blockchain-developer": "blockchain",
"crypto-bd-agent": "blockchain",
"defi-protocol-templates": "blockchain",
"goldrush-api": "blockchain",
"web3-testing": "blockchain",
"javascript-pro": "code",
"python-pro": "code",
"typescript-pro": "code",
"golang-pro": "code",
"rust-pro": "code",
"uncle-bob-craft": "code-quality",
"clean-code": "code-quality",
"kaizen": "code-quality",
"code-review-checklist": "code-quality",
"codebase-cleanup-tech-debt": "code-quality",
"code-refactoring-refactor-clean": "code-quality",
"data-engineer": "data",
"dbt-transformation-patterns": "data",
"analytics-tracking": "data",
"sql-pro": "data",
"web-scraper": "data",
"x-twitter-scraper": "data",
"ai-engineering-toolkit": "data-ai",
"embedding-strategies": "data-ai",
"llm-app-patterns": "data-ai",
"local-llm-expert": "data-ai",
"rag-engineer": "data-ai",
"seek-and-analyze-video": "data-ai",
"vector-database-engineer": "data-ai",
"database-admin": "database-processing",
"database-architect": "database-processing",
"database-design": "database-processing",
"database-optimizer": "database-processing",
"base": "database-processing",
"bug-hunter": "development-and-testing",
"debugging-strategies": "development-and-testing",
"openclaw-github-repo-commander": "development-and-testing",
"systematic-debugging": "development-and-testing",
"test-fixing": "development-and-testing",
"docx-official": "document-processing",
"doc-coauthoring": "document-processing",
"pdf": "document-processing",
"pdf-official": "document-processing",
"writer": "document-processing",
"landing-page-generator": "front-end",
"frontend-design": "front-end",
"frontend-developer": "front-end",
"frontend-dev-guidelines": "front-end",
"ui-ux-pro-max": "front-end",
"astro": "frontend",
"nextjs-best-practices": "frontend",
"react-patterns": "frontend",
"sveltekit": "frontend",
"tailwind-patterns": "frontend",
"django-pro": "framework",
"fastapi-pro": "framework",
"nestjs-expert": "framework",
"nextjs-app-router-patterns": "framework",
"trpc-fullstack": "framework",
"typescript-expert": "framework",
"algorithmic-art": "graphics-processing",
"canvas-design": "graphics-processing",
"draw": "graphics-processing",
"image-studio": "graphics-processing",
"imagen": "graphics-processing",
"app-store-optimization": "marketing",
"content-creator": "marketing",
"copy-editing": "marketing",
"copywriting": "marketing",
"email-sequence": "marketing",
"launch-strategy": "marketing",
"programmatic-seo": "marketing",
"remotion-best-practices": "media",
"sora": "media",
"videodb": "media",
"videodb-skills": "media",
"agent-memory-systems": "memory",
"context-window-management": "memory",
"conversation-memory": "memory",
"hierarchical-agent-memory": "memory",
"memory-systems": "memory",
"recallmax": "memory",
"blueprint": "planning",
"concise-planning": "planning",
"planning-with-files": "planning",
"track-management": "planning",
"google-slides-automation": "presentation-processing",
"frontend-slides": "presentation-processing",
"impress": "presentation-processing",
"pptx-official": "presentation-processing",
"file-organizer": "productivity",
"google-calendar-automation": "productivity",
"interview-coach": "productivity",
"office-productivity": "productivity",
"github-issue-creator": "project-management",
"linear-claude-skill": "project-management",
"progressive-estimation": "project-management",
"team-collaboration-issue": "project-management",
"team-collaboration-standup-notes": "project-management",
"distributed-tracing": "reliability",
"incident-responder": "reliability",
"observability-engineer": "reliability",
"postmortem-writing": "reliability",
"slo-implementation": "reliability",
"tool-use-guardian": "reliability",
"calc": "spreadsheet-processing",
"google-sheets-automation": "spreadsheet-processing",
"googlesheets-automation": "spreadsheet-processing",
"xlsx-official": "spreadsheet-processing",
"awt-e2e-testing": "test-automation",
"browser-automation": "test-automation",
"e2e-testing-patterns": "test-automation",
"go-playwright": "test-automation",
"playwright-java": "test-automation",
"playwright-skill": "test-automation",
"test-automator": "test-automation",
"webapp-testing": "test-automation",
"audio-transcriber": "voice-agents",
"fal-audio": "voice-agents",
"pipecat-friday-agent": "voice-agents",
}
@@ -286,8 +397,7 @@ def infer_category(skill_id, skill_name, description):
def normalize_category(category):
if not isinstance(category, str):
return category
normalized = category.strip().lower()
return CATEGORY_ALIASES.get(normalized, normalized)
return category.strip().lower()
def normalize_yaml_value(value):
if isinstance(value, Mapping):
@@ -394,6 +504,8 @@ def generate_index(skills_dir, output_file):
skill_info["description"],
)
skill_info["category"] = inferred_category or "uncategorized"
if skill_info["id"] in CURATED_CATEGORY_OVERRIDES:
skill_info["category"] = CURATED_CATEGORY_OVERRIDES[skill_info["id"]]
skill_info["category"] = normalize_category(skill_info["category"])
# Fallback for description if missing in frontmatter (legacy support)

View File

@@ -20,10 +20,10 @@ generate_index = load_module("tools/scripts/generate_index.py", "generate_index_
class GenerateIndexCategoryTests(unittest.TestCase):
def test_normalize_category_maps_legacy_labels(self):
self.assertEqual(generate_index.normalize_category("front-end"), "web-development")
self.assertEqual(generate_index.normalize_category("ai-agents"), "ai-ml")
self.assertEqual(generate_index.normalize_category("document-processing"), "productivity")
def test_normalize_category_preserves_specialized_labels(self):
self.assertEqual(generate_index.normalize_category(" Front-End "), "front-end")
self.assertEqual(generate_index.normalize_category("Ai-Agents"), "ai-agents")
self.assertEqual(generate_index.normalize_category("Document-Processing"), "document-processing")
def test_infer_category_returns_none_for_weak_signal(self):
inferred = generate_index.infer_category(
@@ -97,9 +97,9 @@ class GenerateIndexCategoryTests(unittest.TestCase):
self.assertEqual(categories["explicit-skill"], "custom")
self.assertEqual(categories["nested-skill"], "bundles")
self.assertEqual(categories["playwright-skill"], "testing")
self.assertEqual(categories["playwright-skill"], "test-automation")
def test_generate_index_normalizes_explicit_legacy_category(self):
def test_generate_index_preserves_explicit_specialized_category(self):
with tempfile.TemporaryDirectory() as temp_dir:
base = pathlib.Path(temp_dir)
skills_dir = base / "skills"
@@ -113,7 +113,23 @@ class GenerateIndexCategoryTests(unittest.TestCase):
)
skills = generate_index.generate_index(str(skills_dir), str(output_file))
self.assertEqual(skills[0]["category"], "web-development")
self.assertEqual(skills[0]["category"], "front-end")
def test_generate_index_applies_curated_override(self):
with tempfile.TemporaryDirectory() as temp_dir:
base = pathlib.Path(temp_dir)
skills_dir = base / "skills"
output_file = base / "skills_index.json"
override_dir = skills_dir / "playwright-skill"
override_dir.mkdir(parents=True)
(override_dir / "SKILL.md").write_text(
"---\nname: playwright-skill\ncategory: custom\ndescription: Browser automation\n---\nbody\n",
encoding="utf-8",
)
skills = generate_index.generate_index(str(skills_dir), str(output_file))
self.assertEqual(skills[0]["category"], "test-automation")
if __name__ == "__main__":