meta(index): Normalize legacy catalog categories

This commit is contained in:
sickn33
2026-03-20 09:39:25 +01:00
parent 515423b80d
commit b5405ea324
3 changed files with 86 additions and 29 deletions

View File

@@ -205,6 +205,34 @@ FAMILY_CATEGORY_RULES = [
("terraform-", "devops"),
]
CATEGORY_ALIASES = {
# Legacy/specialized labels normalized to broader catalog buckets
"ai-agents": "ai-ml",
"voice-agents": "ai-ml",
"data-ai": "ai-ml",
"memory": "ai-ml",
"api-integration": "backend",
"blockchain": "backend",
"front-end": "web-development",
"frontend": "web-development",
"app-builder": "development",
"code": "development",
"code-quality": "development",
"development-and-testing": "development",
"framework": "development",
"database-processing": "database",
"document-processing": "productivity",
"spreadsheet-processing": "productivity",
"presentation-processing": "productivity",
"graphics-processing": "productivity",
"data": "data-science",
"marketing": "business",
"planning": "workflow",
"project-management": "workflow",
"reliability": "devops",
"test-automation": "testing",
}
def tokenize(text):
return re.findall(r"[a-z0-9]+", text.lower())
@@ -254,6 +282,13 @@ def infer_category(skill_id, skill_name, description):
return best_category
def normalize_category(category):
if not isinstance(category, str):
return category
normalized = category.strip().lower()
return CATEGORY_ALIASES.get(normalized, normalized)
def normalize_yaml_value(value):
if isinstance(value, Mapping):
return {key: normalize_yaml_value(val) for key, val in value.items()}
@@ -359,6 +394,7 @@ def generate_index(skills_dir, output_file):
skill_info["description"],
)
skill_info["category"] = inferred_category or "uncategorized"
skill_info["category"] = normalize_category(skill_info["category"])
# Fallback for description if missing in frontmatter (legacy support)
if not skill_info["description"]:

View File

@@ -20,6 +20,11 @@ generate_index = load_module("tools/scripts/generate_index.py", "generate_index_
class GenerateIndexCategoryTests(unittest.TestCase):
def test_normalize_category_maps_legacy_labels(self):
self.assertEqual(generate_index.normalize_category("front-end"), "web-development")
self.assertEqual(generate_index.normalize_category("ai-agents"), "ai-ml")
self.assertEqual(generate_index.normalize_category("document-processing"), "productivity")
def test_infer_category_returns_none_for_weak_signal(self):
inferred = generate_index.infer_category(
"mystery-skill",
@@ -94,6 +99,22 @@ class GenerateIndexCategoryTests(unittest.TestCase):
self.assertEqual(categories["nested-skill"], "bundles")
self.assertEqual(categories["playwright-skill"], "testing")
def test_generate_index_normalizes_explicit_legacy_category(self):
with tempfile.TemporaryDirectory() as temp_dir:
base = pathlib.Path(temp_dir)
skills_dir = base / "skills"
output_file = base / "skills_index.json"
legacy_dir = skills_dir / "legacy-skill"
legacy_dir.mkdir(parents=True)
(legacy_dir / "SKILL.md").write_text(
"---\nname: legacy-skill\ncategory: front-end\ndescription: Example\n---\nbody\n",
encoding="utf-8",
)
skills = generate_index.generate_index(str(skills_dir), str(output_file))
self.assertEqual(skills[0]["category"], "web-development")
if __name__ == "__main__":
unittest.main()