diff --git a/skills_index.json b/skills_index.json index 80b75024..961f32ea 100644 --- a/skills_index.json +++ b/skills_index.json @@ -342,7 +342,7 @@ { "id": "ai-engineering-toolkit", "path": "skills/ai-engineering-toolkit", - "category": "data-ai", + "category": "ai-ml", "name": "ai-engineering-toolkit", "description": "6 production-ready AI engineering workflows: prompt evaluation (8-dimension scoring), context budget planning, RAG pipeline design, agent security audit (65-point checklist), eval harness building, and product sense coaching.", "risk": "offensive", @@ -972,7 +972,7 @@ { "id": "astro", "path": "skills/astro", - "category": "frontend", + "category": "web-development", "name": "astro", "description": "Build content-focused websites with Astro \u2014 zero JS by default, islands architecture, multi-framework components, and Markdown/MDX support.", "risk": "safe", @@ -2452,7 +2452,7 @@ { "id": "base", "path": "skills/libreoffice/base", - "category": "database-processing", + "category": "database", "name": "base", "description": "Database management, forms, reports, and data operations with LibreOffice Base.", "risk": "safe", @@ -2662,7 +2662,7 @@ { "id": "blueprint", "path": "skills/blueprint", - "category": "planning", + "category": "workflow", "name": "blueprint", "description": "Turn a one-line objective into a step-by-step construction plan any coding agent can execute cold. Each step has a self-contained context brief \u2014 a fresh agent in a new session can pick up any step without reading prior steps.", "risk": "safe", @@ -2922,7 +2922,7 @@ { "id": "calc", "path": "skills/libreoffice/calc", - "category": "spreadsheet-processing", + "category": "productivity", "name": "calc", "description": "Spreadsheet creation, format conversion (ODS/XLSX/CSV), formulas, data automation with LibreOffice Calc.", "risk": "safe", @@ -3692,7 +3692,7 @@ { "id": "content-creator", "path": "skills/content-creator", - "category": "marketing", + "category": "business", "name": "content-creator", "description": "Professional-grade brand voice analysis, SEO optimization, and platform-specific content frameworks.", "risk": "unknown", @@ -4692,7 +4692,7 @@ { "id": "draw", "path": "skills/libreoffice/draw", - "category": "graphics-processing", + "category": "productivity", "name": "draw", "description": "Vector graphics and diagram creation, format conversion (ODG/SVG/PDF) with LibreOffice Draw.", "risk": "safe", @@ -6082,7 +6082,7 @@ { "id": "goldrush-api", "path": "skills/goldrush-api", - "category": "blockchain", + "category": "backend", "name": "goldrush-api", "description": "Query blockchain data across 100+ chains: wallet balances, token prices, transactions, DEX pairs, and real-time OHLCV streams via the GoldRush API by Covalent.", "risk": "safe", @@ -6642,7 +6642,7 @@ { "id": "impress", "path": "skills/libreoffice/impress", - "category": "presentation-processing", + "category": "productivity", "name": "impress", "description": "Presentation creation, format conversion (ODP/PPTX/PDF), slide automation with LibreOffice Impress.", "risk": "safe", @@ -7012,7 +7012,7 @@ { "id": "landing-page-generator", "path": "skills/landing-page-generator", - "category": "front-end", + "category": "web-development", "name": "landing-page-generator", "description": "Generates high-converting Next.js/React landing pages with Tailwind CSS. Uses PAS, AIDA, and BAB frameworks for optimized copy/components (Heroes, Features, Pricing). Focuses on Core Web Vitals/SEO.", "risk": "safe", @@ -7402,7 +7402,7 @@ { "id": "local-llm-expert", "path": "skills/local-llm-expert", - "category": "data-ai", + "category": "ai-ml", "name": "local-llm-expert", "description": "Master local LLM inference, model selection, VRAM optimization, and local deployment using Ollama, llama.cpp, vLLM, and LM Studio. Expert in quantization formats (GGUF, EXL2) and local AI privacy.", "risk": "unknown", @@ -8202,7 +8202,7 @@ { "id": "nestjs-expert", "path": "skills/nestjs-expert", - "category": "framework", + "category": "development", "name": "nestjs-expert", "description": "You are an expert in Nest.js with deep knowledge of enterprise-grade Node.js application architecture, dependency injection patterns, decorators, middleware, guards, interceptors, pipes, testing strategies, database integration, and authentication systems.", "risk": "unknown", @@ -8712,7 +8712,7 @@ { "id": "openclaw-github-repo-commander", "path": "skills/openclaw-github-repo-commander", - "category": "development-and-testing", + "category": "development", "name": "openclaw-github-repo-commander", "description": "7-stage super workflow for GitHub repo audit, cleanup, PR review, and competitor analysis", "risk": "safe", @@ -8802,7 +8802,7 @@ { "id": "pakistan-payments-stack", "path": "skills/pakistan-payments-stack", - "category": "api-integration", + "category": "backend", "name": "pakistan-payments-stack", "description": "Design and implement production-grade Pakistani payment integrations (JazzCash, Easypaisa, bank/PSP rails, optional Raast) for SaaS with PKR billing, webhook reliability, and reconciliation.", "risk": "safe", @@ -8972,7 +8972,7 @@ { "id": "pipecat-friday-agent", "path": "skills/pipecat-friday-agent", - "category": "voice-agents", + "category": "ai-ml", "name": "pipecat-friday-agent", "description": "Build a low-latency, Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) using Pipecat, Gemini, and OpenAI.", "risk": "safe", @@ -9022,7 +9022,7 @@ { "id": "playwright-java", "path": "skills/playwright-java", - "category": "test-automation", + "category": "testing", "name": "playwright-java", "description": "Scaffold, write, debug, and enhance enterprise-grade Playwright E2E tests in Java using Page Object Model, JUnit 5, Allure reporting, and parallel execution.", "risk": "safe", @@ -9302,7 +9302,7 @@ { "id": "progressive-estimation", "path": "skills/progressive-estimation", - "category": "project-management", + "category": "workflow", "name": "progressive-estimation", "description": "Estimate AI-assisted and hybrid human+agent development work with research-backed PERT statistics and calibration feedback loops", "risk": "safe", @@ -9422,7 +9422,7 @@ { "id": "pydantic-ai", "path": "skills/pydantic-ai", - "category": "ai-agents", + "category": "ai-ml", "name": "pydantic-ai", "description": "Build production-ready AI agents with PydanticAI \u2014 type-safe tool use, structured outputs, dependency injection, and multi-model support.", "risk": "safe", @@ -9682,7 +9682,7 @@ { "id": "recallmax", "path": "skills/recallmax", - "category": "memory", + "category": "ai-ml", "name": "recallmax", "description": "FREE \u2014 God-tier long-context memory for AI agents. Injects 500K-1M clean tokens, auto-summarizes with tone/intent preservation, compresses 14-turn history into 800 tokens.", "risk": "safe", @@ -9982,7 +9982,7 @@ { "id": "sankhya-dashboard-html-jsp-custom-best-pratices", "path": "skills/sankhya-dashboard-html-jsp-custom-best-pratices", - "category": "code", + "category": "development", "name": "sankhya-dashboard-html-jsp-custom-best-pratices", "description": "This skill should be used when the user asks for patterns, best practices, creation, or fixing of Sankhya dashboards using HTML, JSP, Java, and SQL.", "risk": "safe", @@ -10202,7 +10202,7 @@ { "id": "seek-and-analyze-video", "path": "skills/seek-and-analyze-video", - "category": "data-ai", + "category": "ai-ml", "name": "seek-and-analyze-video", "description": "Seek and analyze video content using Memories.ai Large Visual Memory Model for persistent video intelligence", "risk": "safe", @@ -11112,7 +11112,7 @@ { "id": "sveltekit", "path": "skills/sveltekit", - "category": "frontend", + "category": "web-development", "name": "sveltekit", "description": "Build full-stack web applications with SvelteKit \u2014 file-based routing, SSR, SSG, API routes, and form actions in one framework.", "risk": "safe", @@ -11352,7 +11352,7 @@ { "id": "templates", "path": "skills/app-builder/templates", - "category": "app-builder", + "category": "development", "name": "templates", "description": "Project scaffolding templates for new applications. Use when creating new projects from scratch. Contains 12 templates for various tech stacks.", "risk": "unknown", @@ -11662,7 +11662,7 @@ { "id": "tool-use-guardian", "path": "skills/tool-use-guardian", - "category": "reliability", + "category": "devops", "name": "tool-use-guardian", "description": "FREE \u2014 Intelligent tool-call reliability wrapper. Monitors, retries, fixes, and learns from tool failures. Auto-recovers from truncated JSON, timeouts, rate limits, and mid-chain failures.", "risk": "safe", @@ -11722,7 +11722,7 @@ { "id": "trpc-fullstack", "path": "skills/trpc-fullstack", - "category": "framework", + "category": "development", "name": "trpc-fullstack", "description": "Build end-to-end type-safe APIs with tRPC \u2014 routers, procedures, middleware, subscriptions, and Next.js/React integration patterns.", "risk": "none", @@ -11782,7 +11782,7 @@ { "id": "typescript-expert", "path": "skills/typescript-expert", - "category": "framework", + "category": "development", "name": "typescript-expert", "description": "TypeScript and JavaScript expert with deep knowledge of type-level programming, performance optimization, monorepo management, migration strategies, and modern tooling.", "risk": "unknown", @@ -11842,7 +11842,7 @@ { "id": "uncle-bob-craft", "path": "skills/uncle-bob-craft", - "category": "code-quality", + "category": "development", "name": "uncle-bob-craft", "description": "Use when performing code review, writing or refactoring code, or discussing architecture; complements clean-code and does not replace project linter/formatter.", "risk": "safe", @@ -12522,7 +12522,7 @@ { "id": "writer", "path": "skills/libreoffice/writer", - "category": "document-processing", + "category": "productivity", "name": "writer", "description": "Document creation, format conversion (ODT/DOCX/PDF), mail merge, and automation with LibreOffice Writer.", "risk": "safe", @@ -12562,7 +12562,7 @@ { "id": "x-twitter-scraper", "path": "skills/x-twitter-scraper", - "category": "data", + "category": "data-science", "name": "x-twitter-scraper", "description": "X (Twitter) data platform skill \u2014 tweet search, user lookup, follower extraction, engagement metrics, giveaway draws, monitoring, webhooks, 19 extraction tools, MCP server.", "risk": "safe", diff --git a/tools/scripts/generate_index.py b/tools/scripts/generate_index.py index 947963ce..683d32bc 100644 --- a/tools/scripts/generate_index.py +++ b/tools/scripts/generate_index.py @@ -205,6 +205,34 @@ FAMILY_CATEGORY_RULES = [ ("terraform-", "devops"), ] +CATEGORY_ALIASES = { + # Legacy/specialized labels normalized to broader catalog buckets + "ai-agents": "ai-ml", + "voice-agents": "ai-ml", + "data-ai": "ai-ml", + "memory": "ai-ml", + "api-integration": "backend", + "blockchain": "backend", + "front-end": "web-development", + "frontend": "web-development", + "app-builder": "development", + "code": "development", + "code-quality": "development", + "development-and-testing": "development", + "framework": "development", + "database-processing": "database", + "document-processing": "productivity", + "spreadsheet-processing": "productivity", + "presentation-processing": "productivity", + "graphics-processing": "productivity", + "data": "data-science", + "marketing": "business", + "planning": "workflow", + "project-management": "workflow", + "reliability": "devops", + "test-automation": "testing", +} + def tokenize(text): return re.findall(r"[a-z0-9]+", text.lower()) @@ -254,6 +282,13 @@ def infer_category(skill_id, skill_name, description): return best_category + +def normalize_category(category): + if not isinstance(category, str): + return category + normalized = category.strip().lower() + return CATEGORY_ALIASES.get(normalized, normalized) + def normalize_yaml_value(value): if isinstance(value, Mapping): return {key: normalize_yaml_value(val) for key, val in value.items()} @@ -359,6 +394,7 @@ def generate_index(skills_dir, output_file): skill_info["description"], ) skill_info["category"] = inferred_category or "uncategorized" + skill_info["category"] = normalize_category(skill_info["category"]) # Fallback for description if missing in frontmatter (legacy support) if not skill_info["description"]: diff --git a/tools/scripts/tests/test_generate_index_categories.py b/tools/scripts/tests/test_generate_index_categories.py index bf687424..53b05f0f 100644 --- a/tools/scripts/tests/test_generate_index_categories.py +++ b/tools/scripts/tests/test_generate_index_categories.py @@ -20,6 +20,11 @@ generate_index = load_module("tools/scripts/generate_index.py", "generate_index_ class GenerateIndexCategoryTests(unittest.TestCase): + def test_normalize_category_maps_legacy_labels(self): + self.assertEqual(generate_index.normalize_category("front-end"), "web-development") + self.assertEqual(generate_index.normalize_category("ai-agents"), "ai-ml") + self.assertEqual(generate_index.normalize_category("document-processing"), "productivity") + def test_infer_category_returns_none_for_weak_signal(self): inferred = generate_index.infer_category( "mystery-skill", @@ -94,6 +99,22 @@ class GenerateIndexCategoryTests(unittest.TestCase): self.assertEqual(categories["nested-skill"], "bundles") self.assertEqual(categories["playwright-skill"], "testing") + def test_generate_index_normalizes_explicit_legacy_category(self): + with tempfile.TemporaryDirectory() as temp_dir: + base = pathlib.Path(temp_dir) + skills_dir = base / "skills" + output_file = base / "skills_index.json" + + legacy_dir = skills_dir / "legacy-skill" + legacy_dir.mkdir(parents=True) + (legacy_dir / "SKILL.md").write_text( + "---\nname: legacy-skill\ncategory: front-end\ndescription: Example\n---\nbody\n", + encoding="utf-8", + ) + + skills = generate_index.generate_index(str(skills_dir), str(output_file)) + self.assertEqual(skills[0]["category"], "web-development") + if __name__ == "__main__": unittest.main()