meta(index): Normalize legacy catalog categories
This commit is contained in:
@@ -205,6 +205,34 @@ FAMILY_CATEGORY_RULES = [
|
||||
("terraform-", "devops"),
|
||||
]
|
||||
|
||||
CATEGORY_ALIASES = {
|
||||
# Legacy/specialized labels normalized to broader catalog buckets
|
||||
"ai-agents": "ai-ml",
|
||||
"voice-agents": "ai-ml",
|
||||
"data-ai": "ai-ml",
|
||||
"memory": "ai-ml",
|
||||
"api-integration": "backend",
|
||||
"blockchain": "backend",
|
||||
"front-end": "web-development",
|
||||
"frontend": "web-development",
|
||||
"app-builder": "development",
|
||||
"code": "development",
|
||||
"code-quality": "development",
|
||||
"development-and-testing": "development",
|
||||
"framework": "development",
|
||||
"database-processing": "database",
|
||||
"document-processing": "productivity",
|
||||
"spreadsheet-processing": "productivity",
|
||||
"presentation-processing": "productivity",
|
||||
"graphics-processing": "productivity",
|
||||
"data": "data-science",
|
||||
"marketing": "business",
|
||||
"planning": "workflow",
|
||||
"project-management": "workflow",
|
||||
"reliability": "devops",
|
||||
"test-automation": "testing",
|
||||
}
|
||||
|
||||
|
||||
def tokenize(text):
|
||||
return re.findall(r"[a-z0-9]+", text.lower())
|
||||
@@ -254,6 +282,13 @@ def infer_category(skill_id, skill_name, description):
|
||||
|
||||
return best_category
|
||||
|
||||
|
||||
def normalize_category(category):
|
||||
if not isinstance(category, str):
|
||||
return category
|
||||
normalized = category.strip().lower()
|
||||
return CATEGORY_ALIASES.get(normalized, normalized)
|
||||
|
||||
def normalize_yaml_value(value):
|
||||
if isinstance(value, Mapping):
|
||||
return {key: normalize_yaml_value(val) for key, val in value.items()}
|
||||
@@ -359,6 +394,7 @@ def generate_index(skills_dir, output_file):
|
||||
skill_info["description"],
|
||||
)
|
||||
skill_info["category"] = inferred_category or "uncategorized"
|
||||
skill_info["category"] = normalize_category(skill_info["category"])
|
||||
|
||||
# Fallback for description if missing in frontmatter (legacy support)
|
||||
if not skill_info["description"]:
|
||||
|
||||
@@ -20,6 +20,11 @@ generate_index = load_module("tools/scripts/generate_index.py", "generate_index_
|
||||
|
||||
|
||||
class GenerateIndexCategoryTests(unittest.TestCase):
|
||||
def test_normalize_category_maps_legacy_labels(self):
|
||||
self.assertEqual(generate_index.normalize_category("front-end"), "web-development")
|
||||
self.assertEqual(generate_index.normalize_category("ai-agents"), "ai-ml")
|
||||
self.assertEqual(generate_index.normalize_category("document-processing"), "productivity")
|
||||
|
||||
def test_infer_category_returns_none_for_weak_signal(self):
|
||||
inferred = generate_index.infer_category(
|
||||
"mystery-skill",
|
||||
@@ -94,6 +99,22 @@ class GenerateIndexCategoryTests(unittest.TestCase):
|
||||
self.assertEqual(categories["nested-skill"], "bundles")
|
||||
self.assertEqual(categories["playwright-skill"], "testing")
|
||||
|
||||
def test_generate_index_normalizes_explicit_legacy_category(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
base = pathlib.Path(temp_dir)
|
||||
skills_dir = base / "skills"
|
||||
output_file = base / "skills_index.json"
|
||||
|
||||
legacy_dir = skills_dir / "legacy-skill"
|
||||
legacy_dir.mkdir(parents=True)
|
||||
(legacy_dir / "SKILL.md").write_text(
|
||||
"---\nname: legacy-skill\ncategory: front-end\ndescription: Example\n---\nbody\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
skills = generate_index.generate_index(str(skills_dir), str(output_file))
|
||||
self.assertEqual(skills[0]["category"], "web-development")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user