meta(index): Normalize legacy catalog categories

This commit is contained in:
sickn33
2026-03-20 09:39:25 +01:00
parent 515423b80d
commit b5405ea324
3 changed files with 86 additions and 29 deletions

View File

@@ -342,7 +342,7 @@
{
"id": "ai-engineering-toolkit",
"path": "skills/ai-engineering-toolkit",
"category": "data-ai",
"category": "ai-ml",
"name": "ai-engineering-toolkit",
"description": "6 production-ready AI engineering workflows: prompt evaluation (8-dimension scoring), context budget planning, RAG pipeline design, agent security audit (65-point checklist), eval harness building, and product sense coaching.",
"risk": "offensive",
@@ -972,7 +972,7 @@
{
"id": "astro",
"path": "skills/astro",
"category": "frontend",
"category": "web-development",
"name": "astro",
"description": "Build content-focused websites with Astro \u2014 zero JS by default, islands architecture, multi-framework components, and Markdown/MDX support.",
"risk": "safe",
@@ -2452,7 +2452,7 @@
{
"id": "base",
"path": "skills/libreoffice/base",
"category": "database-processing",
"category": "database",
"name": "base",
"description": "Database management, forms, reports, and data operations with LibreOffice Base.",
"risk": "safe",
@@ -2662,7 +2662,7 @@
{
"id": "blueprint",
"path": "skills/blueprint",
"category": "planning",
"category": "workflow",
"name": "blueprint",
"description": "Turn a one-line objective into a step-by-step construction plan any coding agent can execute cold. Each step has a self-contained context brief \u2014 a fresh agent in a new session can pick up any step without reading prior steps.",
"risk": "safe",
@@ -2922,7 +2922,7 @@
{
"id": "calc",
"path": "skills/libreoffice/calc",
"category": "spreadsheet-processing",
"category": "productivity",
"name": "calc",
"description": "Spreadsheet creation, format conversion (ODS/XLSX/CSV), formulas, data automation with LibreOffice Calc.",
"risk": "safe",
@@ -3692,7 +3692,7 @@
{
"id": "content-creator",
"path": "skills/content-creator",
"category": "marketing",
"category": "business",
"name": "content-creator",
"description": "Professional-grade brand voice analysis, SEO optimization, and platform-specific content frameworks.",
"risk": "unknown",
@@ -4692,7 +4692,7 @@
{
"id": "draw",
"path": "skills/libreoffice/draw",
"category": "graphics-processing",
"category": "productivity",
"name": "draw",
"description": "Vector graphics and diagram creation, format conversion (ODG/SVG/PDF) with LibreOffice Draw.",
"risk": "safe",
@@ -6082,7 +6082,7 @@
{
"id": "goldrush-api",
"path": "skills/goldrush-api",
"category": "blockchain",
"category": "backend",
"name": "goldrush-api",
"description": "Query blockchain data across 100+ chains: wallet balances, token prices, transactions, DEX pairs, and real-time OHLCV streams via the GoldRush API by Covalent.",
"risk": "safe",
@@ -6642,7 +6642,7 @@
{
"id": "impress",
"path": "skills/libreoffice/impress",
"category": "presentation-processing",
"category": "productivity",
"name": "impress",
"description": "Presentation creation, format conversion (ODP/PPTX/PDF), slide automation with LibreOffice Impress.",
"risk": "safe",
@@ -7012,7 +7012,7 @@
{
"id": "landing-page-generator",
"path": "skills/landing-page-generator",
"category": "front-end",
"category": "web-development",
"name": "landing-page-generator",
"description": "Generates high-converting Next.js/React landing pages with Tailwind CSS. Uses PAS, AIDA, and BAB frameworks for optimized copy/components (Heroes, Features, Pricing). Focuses on Core Web Vitals/SEO.",
"risk": "safe",
@@ -7402,7 +7402,7 @@
{
"id": "local-llm-expert",
"path": "skills/local-llm-expert",
"category": "data-ai",
"category": "ai-ml",
"name": "local-llm-expert",
"description": "Master local LLM inference, model selection, VRAM optimization, and local deployment using Ollama, llama.cpp, vLLM, and LM Studio. Expert in quantization formats (GGUF, EXL2) and local AI privacy.",
"risk": "unknown",
@@ -8202,7 +8202,7 @@
{
"id": "nestjs-expert",
"path": "skills/nestjs-expert",
"category": "framework",
"category": "development",
"name": "nestjs-expert",
"description": "You are an expert in Nest.js with deep knowledge of enterprise-grade Node.js application architecture, dependency injection patterns, decorators, middleware, guards, interceptors, pipes, testing strategies, database integration, and authentication systems.",
"risk": "unknown",
@@ -8712,7 +8712,7 @@
{
"id": "openclaw-github-repo-commander",
"path": "skills/openclaw-github-repo-commander",
"category": "development-and-testing",
"category": "development",
"name": "openclaw-github-repo-commander",
"description": "7-stage super workflow for GitHub repo audit, cleanup, PR review, and competitor analysis",
"risk": "safe",
@@ -8802,7 +8802,7 @@
{
"id": "pakistan-payments-stack",
"path": "skills/pakistan-payments-stack",
"category": "api-integration",
"category": "backend",
"name": "pakistan-payments-stack",
"description": "Design and implement production-grade Pakistani payment integrations (JazzCash, Easypaisa, bank/PSP rails, optional Raast) for SaaS with PKR billing, webhook reliability, and reconciliation.",
"risk": "safe",
@@ -8972,7 +8972,7 @@
{
"id": "pipecat-friday-agent",
"path": "skills/pipecat-friday-agent",
"category": "voice-agents",
"category": "ai-ml",
"name": "pipecat-friday-agent",
"description": "Build a low-latency, Iron Man-inspired tactical voice assistant (F.R.I.D.A.Y.) using Pipecat, Gemini, and OpenAI.",
"risk": "safe",
@@ -9022,7 +9022,7 @@
{
"id": "playwright-java",
"path": "skills/playwright-java",
"category": "test-automation",
"category": "testing",
"name": "playwright-java",
"description": "Scaffold, write, debug, and enhance enterprise-grade Playwright E2E tests in Java using Page Object Model, JUnit 5, Allure reporting, and parallel execution.",
"risk": "safe",
@@ -9302,7 +9302,7 @@
{
"id": "progressive-estimation",
"path": "skills/progressive-estimation",
"category": "project-management",
"category": "workflow",
"name": "progressive-estimation",
"description": "Estimate AI-assisted and hybrid human+agent development work with research-backed PERT statistics and calibration feedback loops",
"risk": "safe",
@@ -9422,7 +9422,7 @@
{
"id": "pydantic-ai",
"path": "skills/pydantic-ai",
"category": "ai-agents",
"category": "ai-ml",
"name": "pydantic-ai",
"description": "Build production-ready AI agents with PydanticAI \u2014 type-safe tool use, structured outputs, dependency injection, and multi-model support.",
"risk": "safe",
@@ -9682,7 +9682,7 @@
{
"id": "recallmax",
"path": "skills/recallmax",
"category": "memory",
"category": "ai-ml",
"name": "recallmax",
"description": "FREE \u2014 God-tier long-context memory for AI agents. Injects 500K-1M clean tokens, auto-summarizes with tone/intent preservation, compresses 14-turn history into 800 tokens.",
"risk": "safe",
@@ -9982,7 +9982,7 @@
{
"id": "sankhya-dashboard-html-jsp-custom-best-pratices",
"path": "skills/sankhya-dashboard-html-jsp-custom-best-pratices",
"category": "code",
"category": "development",
"name": "sankhya-dashboard-html-jsp-custom-best-pratices",
"description": "This skill should be used when the user asks for patterns, best practices, creation, or fixing of Sankhya dashboards using HTML, JSP, Java, and SQL.",
"risk": "safe",
@@ -10202,7 +10202,7 @@
{
"id": "seek-and-analyze-video",
"path": "skills/seek-and-analyze-video",
"category": "data-ai",
"category": "ai-ml",
"name": "seek-and-analyze-video",
"description": "Seek and analyze video content using Memories.ai Large Visual Memory Model for persistent video intelligence",
"risk": "safe",
@@ -11112,7 +11112,7 @@
{
"id": "sveltekit",
"path": "skills/sveltekit",
"category": "frontend",
"category": "web-development",
"name": "sveltekit",
"description": "Build full-stack web applications with SvelteKit \u2014 file-based routing, SSR, SSG, API routes, and form actions in one framework.",
"risk": "safe",
@@ -11352,7 +11352,7 @@
{
"id": "templates",
"path": "skills/app-builder/templates",
"category": "app-builder",
"category": "development",
"name": "templates",
"description": "Project scaffolding templates for new applications. Use when creating new projects from scratch. Contains 12 templates for various tech stacks.",
"risk": "unknown",
@@ -11662,7 +11662,7 @@
{
"id": "tool-use-guardian",
"path": "skills/tool-use-guardian",
"category": "reliability",
"category": "devops",
"name": "tool-use-guardian",
"description": "FREE \u2014 Intelligent tool-call reliability wrapper. Monitors, retries, fixes, and learns from tool failures. Auto-recovers from truncated JSON, timeouts, rate limits, and mid-chain failures.",
"risk": "safe",
@@ -11722,7 +11722,7 @@
{
"id": "trpc-fullstack",
"path": "skills/trpc-fullstack",
"category": "framework",
"category": "development",
"name": "trpc-fullstack",
"description": "Build end-to-end type-safe APIs with tRPC \u2014 routers, procedures, middleware, subscriptions, and Next.js/React integration patterns.",
"risk": "none",
@@ -11782,7 +11782,7 @@
{
"id": "typescript-expert",
"path": "skills/typescript-expert",
"category": "framework",
"category": "development",
"name": "typescript-expert",
"description": "TypeScript and JavaScript expert with deep knowledge of type-level programming, performance optimization, monorepo management, migration strategies, and modern tooling.",
"risk": "unknown",
@@ -11842,7 +11842,7 @@
{
"id": "uncle-bob-craft",
"path": "skills/uncle-bob-craft",
"category": "code-quality",
"category": "development",
"name": "uncle-bob-craft",
"description": "Use when performing code review, writing or refactoring code, or discussing architecture; complements clean-code and does not replace project linter/formatter.",
"risk": "safe",
@@ -12522,7 +12522,7 @@
{
"id": "writer",
"path": "skills/libreoffice/writer",
"category": "document-processing",
"category": "productivity",
"name": "writer",
"description": "Document creation, format conversion (ODT/DOCX/PDF), mail merge, and automation with LibreOffice Writer.",
"risk": "safe",
@@ -12562,7 +12562,7 @@
{
"id": "x-twitter-scraper",
"path": "skills/x-twitter-scraper",
"category": "data",
"category": "data-science",
"name": "x-twitter-scraper",
"description": "X (Twitter) data platform skill \u2014 tweet search, user lookup, follower extraction, engagement metrics, giveaway draws, monitoring, webhooks, 19 extraction tools, MCP server.",
"risk": "safe",

View File

@@ -205,6 +205,34 @@ FAMILY_CATEGORY_RULES = [
("terraform-", "devops"),
]
CATEGORY_ALIASES = {
# Legacy/specialized labels normalized to broader catalog buckets
"ai-agents": "ai-ml",
"voice-agents": "ai-ml",
"data-ai": "ai-ml",
"memory": "ai-ml",
"api-integration": "backend",
"blockchain": "backend",
"front-end": "web-development",
"frontend": "web-development",
"app-builder": "development",
"code": "development",
"code-quality": "development",
"development-and-testing": "development",
"framework": "development",
"database-processing": "database",
"document-processing": "productivity",
"spreadsheet-processing": "productivity",
"presentation-processing": "productivity",
"graphics-processing": "productivity",
"data": "data-science",
"marketing": "business",
"planning": "workflow",
"project-management": "workflow",
"reliability": "devops",
"test-automation": "testing",
}
def tokenize(text):
return re.findall(r"[a-z0-9]+", text.lower())
@@ -254,6 +282,13 @@ def infer_category(skill_id, skill_name, description):
return best_category
def normalize_category(category):
if not isinstance(category, str):
return category
normalized = category.strip().lower()
return CATEGORY_ALIASES.get(normalized, normalized)
def normalize_yaml_value(value):
if isinstance(value, Mapping):
return {key: normalize_yaml_value(val) for key, val in value.items()}
@@ -359,6 +394,7 @@ def generate_index(skills_dir, output_file):
skill_info["description"],
)
skill_info["category"] = inferred_category or "uncategorized"
skill_info["category"] = normalize_category(skill_info["category"])
# Fallback for description if missing in frontmatter (legacy support)
if not skill_info["description"]:

View File

@@ -20,6 +20,11 @@ generate_index = load_module("tools/scripts/generate_index.py", "generate_index_
class GenerateIndexCategoryTests(unittest.TestCase):
def test_normalize_category_maps_legacy_labels(self):
self.assertEqual(generate_index.normalize_category("front-end"), "web-development")
self.assertEqual(generate_index.normalize_category("ai-agents"), "ai-ml")
self.assertEqual(generate_index.normalize_category("document-processing"), "productivity")
def test_infer_category_returns_none_for_weak_signal(self):
inferred = generate_index.infer_category(
"mystery-skill",
@@ -94,6 +99,22 @@ class GenerateIndexCategoryTests(unittest.TestCase):
self.assertEqual(categories["nested-skill"], "bundles")
self.assertEqual(categories["playwright-skill"], "testing")
def test_generate_index_normalizes_explicit_legacy_category(self):
with tempfile.TemporaryDirectory() as temp_dir:
base = pathlib.Path(temp_dir)
skills_dir = base / "skills"
output_file = base / "skills_index.json"
legacy_dir = skills_dir / "legacy-skill"
legacy_dir.mkdir(parents=True)
(legacy_dir / "SKILL.md").write_text(
"---\nname: legacy-skill\ncategory: front-end\ndescription: Example\n---\nbody\n",
encoding="utf-8",
)
skills = generate_index.generate_index(str(skills_dir), str(output_file))
self.assertEqual(skills[0]["category"], "web-development")
if __name__ == "__main__":
unittest.main()