fix(skills): Restore actionable skill descriptions

This commit is contained in:
sickn33
2026-03-20 17:56:13 +01:00
parent 5e6076245f
commit 86e9b55efb
34 changed files with 639 additions and 185 deletions

View File

@@ -0,0 +1,262 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from _project_paths import find_repo_root
from fix_truncated_descriptions import (
FRONTMATTER_PATTERN,
clamp_description,
is_usable_paragraph,
normalize_for_match,
normalize_text,
replace_description,
split_candidate_paragraphs,
strip_frontmatter,
)
from validate_skills import configure_utf8_output, parse_frontmatter
USAGE_CUE_PATTERNS = (
"use when",
"when to use",
"this skill should be used when",
"use this skill when",
"use this when",
"use it to",
"use for ",
"trigger on",
"triggers on",
)
WHEN_TO_USE_HEADINGS = {
"when to use",
"when to apply",
"when to activate",
}
BULLET_PATTERN = re.compile(r"^(?:[-*]\s+|\d+\.\s+)(.+)$")
SENTENCE_PATTERN = re.compile(r"^(.+?[.!?])(?:\s|$)")
def first_usable_paragraph(body: str) -> str | None:
paragraphs = [
paragraph
for paragraph in split_candidate_paragraphs(body)
if is_usable_paragraph(paragraph)
]
return paragraphs[0] if paragraphs else None
def has_explicit_usage_cue(description: str) -> bool:
lower = description.lower()
return any(phrase in lower for phrase in USAGE_CUE_PATTERNS)
def mirrors_intro_paragraph(description: str, body: str) -> bool:
intro = first_usable_paragraph(body)
if not intro:
return False
return normalize_for_match(description) == normalize_for_match(intro)
def extract_when_to_use_lines(body: str) -> list[str]:
lines = body.splitlines()
capturing = False
captured: list[str] = []
for raw_line in lines:
stripped = raw_line.strip()
heading_match = re.match(r"^(#{2,6})\s+(.*)$", stripped)
if heading_match:
heading = normalize_text(heading_match.group(2)).lower().rstrip(":")
if capturing:
break
capturing = heading in WHEN_TO_USE_HEADINGS
continue
if capturing:
captured.append(raw_line)
return captured
def lower_first_fragment(text: str) -> str:
if not text:
return text
first = text[0]
second = text[1] if len(text) > 1 else ""
if first.isupper() and second.islower():
return first.lower() + text[1:]
return text
def extract_usage_items(section_lines: list[str]) -> list[str]:
items: list[str] = []
for raw_line in section_lines:
stripped = raw_line.strip()
if not stripped:
continue
bullet_match = BULLET_PATTERN.match(stripped)
if bullet_match:
item = normalize_text(bullet_match.group(1)).rstrip(":.")
if ":" in item:
item = item.split(":", 1)[0].strip()
item = re.sub(r"^(?:when|whenever)\s+", "", item, flags=re.IGNORECASE)
if item:
items.append(lower_first_fragment(item))
return items
def build_usage_sentence(section_lines: list[str]) -> str | None:
items = extract_usage_items(section_lines)
if not items:
return None
items = items[:3]
if len(items) == 1:
return f"Use when {items[0]}."
if len(items) == 2:
return f"Use when {items[0]} or {items[1]}."
return f"Use when {items[0]}, {items[1]}, or {items[2]}."
def first_sentence(text: str) -> str:
normalized = normalize_text(text)
match = SENTENCE_PATTERN.match(normalized)
if match:
return match.group(1).strip()
return normalized
def is_substantial_capability(text: str) -> bool:
words = re.findall(r"[A-Za-z0-9]+", text)
stripped = text.strip()
return len(words) >= 6 and not (stripped.startswith("(") and stripped.endswith(")"))
def select_capability_sentence(description: str, body: str) -> str:
description_sentence = first_sentence(description)
if is_substantial_capability(description_sentence):
return description_sentence
description_key = normalize_for_match(description)
for paragraph in split_candidate_paragraphs(body):
if not is_usable_paragraph(paragraph):
continue
if normalize_for_match(paragraph) == description_key:
continue
candidate = first_sentence(paragraph)
if is_substantial_capability(candidate):
return candidate
return description_sentence
def ensure_terminal_punctuation(text: str) -> str:
stripped = text.rstrip()
if not stripped:
return stripped
if stripped.endswith((".", "!", "?")):
return stripped
return f"{stripped}."
def build_repaired_description(description: str, body: str) -> str | None:
if has_explicit_usage_cue(description):
return None
if not mirrors_intro_paragraph(description, body):
return None
usage_sentence = build_usage_sentence(extract_when_to_use_lines(body))
if not usage_sentence:
return None
capability_sentence = ensure_terminal_punctuation(select_capability_sentence(description, body))
candidate = clamp_description(f"{capability_sentence} {usage_sentence}")
if normalize_text(candidate) == normalize_text(description):
return None
return candidate
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
content = skill_path.read_text(encoding="utf-8")
match = FRONTMATTER_PATTERN.search(content)
if not match:
return False, None
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata:
return False, None
description = metadata.get("description")
if not isinstance(description, str):
return False, None
new_description = build_repaired_description(description, strip_frontmatter(content))
if not new_description:
return False, None
updated_frontmatter = replace_description(match.group(1), new_description)
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
if updated_content == content:
return False, None
skill_path.write_text(updated_content, encoding="utf-8")
return True, new_description
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(
description="Repair synthetic descriptions by adding concise when-to-use guidance.",
)
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
fixed = 0
skipped = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
description = metadata.get("description") if metadata else None
if not isinstance(description, str):
continue
new_description = build_repaired_description(description, strip_frontmatter(content))
if not new_description:
continue
if args.dry_run:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
continue
changed, _ = update_skill_file(skill_path)
if changed:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)}")
else:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
print(f"\nFixed: {fixed}")
print(f"Skipped: {skipped}")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -25,6 +25,7 @@ const LOCAL_TEST_COMMANDS = [
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_missing_skill_sections.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_truncated_descriptions.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_generate_index_categories.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_repair_description_usage_summaries.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_sync_microsoft_skills_security.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_validate_skills_headings.py")],
];

View File

@@ -0,0 +1,118 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
repair_descriptions = load_module(
"tools/scripts/repair_description_usage_summaries.py",
"repair_description_usage_summaries",
)
class RepairDescriptionUsageSummariesTests(unittest.TestCase):
def test_build_repaired_description_adds_usage_summary(self):
description = (
"Comprehensive performance optimization guide for React and Next.js applications, "
"maintained by Vercel. Contains 45 rules across 8 categories."
)
body = """
# Vercel React Best Practices
Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories.
## When to Use
- Writing new React components or Next.js pages
- Reviewing code for performance issues
- Refactoring existing React/Next.js code
"""
repaired = repair_descriptions.build_repaired_description(description, body)
self.assertEqual(
repaired,
"Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Use when writing new React components or Next.js pages, reviewing code for performance issues, or refactoring existing React/Next.js code.",
)
def test_build_repaired_description_skips_explicit_usage_descriptions(self):
description = "Optimize React apps. Use when writing or reviewing React and Next.js code."
body = """
# Skill
Optimize React apps.
## When to Use
- Writing React code
"""
repaired = repair_descriptions.build_repaired_description(description, body)
self.assertIsNone(repaired)
def test_build_repaired_description_uses_body_sentence_when_description_is_label(self):
description = "(React · TypeScript · Suspense-First · Production-Grade)"
body = """
# Frontend Development Guidelines
(React · TypeScript · Suspense-First · Production-Grade)
You are a senior frontend engineer operating under strict architectural and performance standards.
## When to Use
- Creating components or pages
- Adding new features
- Fetching or mutating data
"""
repaired = repair_descriptions.build_repaired_description(description, body)
self.assertEqual(
repaired,
"You are a senior frontend engineer operating under strict architectural and performance standards. Use when creating components or pages, adding new features, or fetching or mutating data.",
)
def test_update_skill_file_rewrites_mirrored_description(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: react-best-practices
description: "Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories."
risk: unknown
source: community
---
# Vercel React Best Practices
Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories.
## When to Use
- Writing new React components or Next.js pages
- Reviewing code for performance issues
- Refactoring existing React/Next.js code
""",
encoding="utf-8",
)
changed, new_description = repair_descriptions.update_skill_file(skill_path)
self.assertTrue(changed)
self.assertIn("Use when writing new React components", new_description)
updated = skill_path.read_text(encoding="utf-8")
self.assertIn('description: "Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Use when writing new React components or Next.js pages, reviewing code for performance issues, or refactoring existing React/Next.js code."', updated)
if __name__ == "__main__":
unittest.main()