fix(skills): Restore actionable skill descriptions
This commit is contained in:
262
tools/scripts/repair_description_usage_summaries.py
Normal file
262
tools/scripts/repair_description_usage_summaries.py
Normal file
@@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from _project_paths import find_repo_root
|
||||
from fix_truncated_descriptions import (
|
||||
FRONTMATTER_PATTERN,
|
||||
clamp_description,
|
||||
is_usable_paragraph,
|
||||
normalize_for_match,
|
||||
normalize_text,
|
||||
replace_description,
|
||||
split_candidate_paragraphs,
|
||||
strip_frontmatter,
|
||||
)
|
||||
from validate_skills import configure_utf8_output, parse_frontmatter
|
||||
|
||||
|
||||
USAGE_CUE_PATTERNS = (
|
||||
"use when",
|
||||
"when to use",
|
||||
"this skill should be used when",
|
||||
"use this skill when",
|
||||
"use this when",
|
||||
"use it to",
|
||||
"use for ",
|
||||
"trigger on",
|
||||
"triggers on",
|
||||
)
|
||||
WHEN_TO_USE_HEADINGS = {
|
||||
"when to use",
|
||||
"when to apply",
|
||||
"when to activate",
|
||||
}
|
||||
BULLET_PATTERN = re.compile(r"^(?:[-*]\s+|\d+\.\s+)(.+)$")
|
||||
SENTENCE_PATTERN = re.compile(r"^(.+?[.!?])(?:\s|$)")
|
||||
|
||||
|
||||
def first_usable_paragraph(body: str) -> str | None:
|
||||
paragraphs = [
|
||||
paragraph
|
||||
for paragraph in split_candidate_paragraphs(body)
|
||||
if is_usable_paragraph(paragraph)
|
||||
]
|
||||
return paragraphs[0] if paragraphs else None
|
||||
|
||||
|
||||
def has_explicit_usage_cue(description: str) -> bool:
|
||||
lower = description.lower()
|
||||
return any(phrase in lower for phrase in USAGE_CUE_PATTERNS)
|
||||
|
||||
|
||||
def mirrors_intro_paragraph(description: str, body: str) -> bool:
|
||||
intro = first_usable_paragraph(body)
|
||||
if not intro:
|
||||
return False
|
||||
return normalize_for_match(description) == normalize_for_match(intro)
|
||||
|
||||
|
||||
def extract_when_to_use_lines(body: str) -> list[str]:
|
||||
lines = body.splitlines()
|
||||
capturing = False
|
||||
captured: list[str] = []
|
||||
|
||||
for raw_line in lines:
|
||||
stripped = raw_line.strip()
|
||||
heading_match = re.match(r"^(#{2,6})\s+(.*)$", stripped)
|
||||
if heading_match:
|
||||
heading = normalize_text(heading_match.group(2)).lower().rstrip(":")
|
||||
if capturing:
|
||||
break
|
||||
capturing = heading in WHEN_TO_USE_HEADINGS
|
||||
continue
|
||||
|
||||
if capturing:
|
||||
captured.append(raw_line)
|
||||
|
||||
return captured
|
||||
|
||||
|
||||
def lower_first_fragment(text: str) -> str:
|
||||
if not text:
|
||||
return text
|
||||
first = text[0]
|
||||
second = text[1] if len(text) > 1 else ""
|
||||
if first.isupper() and second.islower():
|
||||
return first.lower() + text[1:]
|
||||
return text
|
||||
|
||||
|
||||
def extract_usage_items(section_lines: list[str]) -> list[str]:
|
||||
items: list[str] = []
|
||||
for raw_line in section_lines:
|
||||
stripped = raw_line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
bullet_match = BULLET_PATTERN.match(stripped)
|
||||
if bullet_match:
|
||||
item = normalize_text(bullet_match.group(1)).rstrip(":.")
|
||||
if ":" in item:
|
||||
item = item.split(":", 1)[0].strip()
|
||||
item = re.sub(r"^(?:when|whenever)\s+", "", item, flags=re.IGNORECASE)
|
||||
if item:
|
||||
items.append(lower_first_fragment(item))
|
||||
return items
|
||||
|
||||
|
||||
def build_usage_sentence(section_lines: list[str]) -> str | None:
|
||||
items = extract_usage_items(section_lines)
|
||||
if not items:
|
||||
return None
|
||||
|
||||
items = items[:3]
|
||||
if len(items) == 1:
|
||||
return f"Use when {items[0]}."
|
||||
if len(items) == 2:
|
||||
return f"Use when {items[0]} or {items[1]}."
|
||||
return f"Use when {items[0]}, {items[1]}, or {items[2]}."
|
||||
|
||||
|
||||
def first_sentence(text: str) -> str:
|
||||
normalized = normalize_text(text)
|
||||
match = SENTENCE_PATTERN.match(normalized)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return normalized
|
||||
|
||||
|
||||
def is_substantial_capability(text: str) -> bool:
|
||||
words = re.findall(r"[A-Za-z0-9]+", text)
|
||||
stripped = text.strip()
|
||||
return len(words) >= 6 and not (stripped.startswith("(") and stripped.endswith(")"))
|
||||
|
||||
|
||||
def select_capability_sentence(description: str, body: str) -> str:
|
||||
description_sentence = first_sentence(description)
|
||||
if is_substantial_capability(description_sentence):
|
||||
return description_sentence
|
||||
|
||||
description_key = normalize_for_match(description)
|
||||
for paragraph in split_candidate_paragraphs(body):
|
||||
if not is_usable_paragraph(paragraph):
|
||||
continue
|
||||
if normalize_for_match(paragraph) == description_key:
|
||||
continue
|
||||
|
||||
candidate = first_sentence(paragraph)
|
||||
if is_substantial_capability(candidate):
|
||||
return candidate
|
||||
|
||||
return description_sentence
|
||||
|
||||
|
||||
def ensure_terminal_punctuation(text: str) -> str:
|
||||
stripped = text.rstrip()
|
||||
if not stripped:
|
||||
return stripped
|
||||
if stripped.endswith((".", "!", "?")):
|
||||
return stripped
|
||||
return f"{stripped}."
|
||||
|
||||
|
||||
def build_repaired_description(description: str, body: str) -> str | None:
|
||||
if has_explicit_usage_cue(description):
|
||||
return None
|
||||
if not mirrors_intro_paragraph(description, body):
|
||||
return None
|
||||
|
||||
usage_sentence = build_usage_sentence(extract_when_to_use_lines(body))
|
||||
if not usage_sentence:
|
||||
return None
|
||||
|
||||
capability_sentence = ensure_terminal_punctuation(select_capability_sentence(description, body))
|
||||
candidate = clamp_description(f"{capability_sentence} {usage_sentence}")
|
||||
if normalize_text(candidate) == normalize_text(description):
|
||||
return None
|
||||
return candidate
|
||||
|
||||
|
||||
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
match = FRONTMATTER_PATTERN.search(content)
|
||||
if not match:
|
||||
return False, None
|
||||
|
||||
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
||||
if not metadata:
|
||||
return False, None
|
||||
|
||||
description = metadata.get("description")
|
||||
if not isinstance(description, str):
|
||||
return False, None
|
||||
|
||||
new_description = build_repaired_description(description, strip_frontmatter(content))
|
||||
if not new_description:
|
||||
return False, None
|
||||
|
||||
updated_frontmatter = replace_description(match.group(1), new_description)
|
||||
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
|
||||
if updated_content == content:
|
||||
return False, None
|
||||
|
||||
skill_path.write_text(updated_content, encoding="utf-8")
|
||||
return True, new_description
|
||||
|
||||
|
||||
def main() -> int:
|
||||
configure_utf8_output()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Repair synthetic descriptions by adding concise when-to-use guidance.",
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = find_repo_root(__file__)
|
||||
skills_dir = repo_root / "skills"
|
||||
|
||||
fixed = 0
|
||||
skipped = 0
|
||||
for root, dirs, files in os.walk(skills_dir):
|
||||
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
||||
if "SKILL.md" not in files:
|
||||
continue
|
||||
|
||||
skill_path = Path(root) / "SKILL.md"
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
||||
description = metadata.get("description") if metadata else None
|
||||
if not isinstance(description, str):
|
||||
continue
|
||||
|
||||
new_description = build_repaired_description(description, strip_frontmatter(content))
|
||||
if not new_description:
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
fixed += 1
|
||||
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
|
||||
continue
|
||||
|
||||
changed, _ = update_skill_file(skill_path)
|
||||
if changed:
|
||||
fixed += 1
|
||||
print(f"FIX {skill_path.relative_to(repo_root)}")
|
||||
else:
|
||||
skipped += 1
|
||||
print(f"SKIP {skill_path.relative_to(repo_root)}")
|
||||
|
||||
print(f"\nFixed: {fixed}")
|
||||
print(f"Skipped: {skipped}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -25,6 +25,7 @@ const LOCAL_TEST_COMMANDS = [
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_missing_skill_sections.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_truncated_descriptions.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_generate_index_categories.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_repair_description_usage_summaries.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_sync_microsoft_skills_security.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_validate_skills_headings.py")],
|
||||
];
|
||||
|
||||
118
tools/scripts/tests/test_repair_description_usage_summaries.py
Normal file
118
tools/scripts/tests/test_repair_description_usage_summaries.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import importlib.util
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
|
||||
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
|
||||
|
||||
|
||||
def load_module(relative_path: str, module_name: str):
|
||||
module_path = REPO_ROOT / relative_path
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
repair_descriptions = load_module(
|
||||
"tools/scripts/repair_description_usage_summaries.py",
|
||||
"repair_description_usage_summaries",
|
||||
)
|
||||
|
||||
|
||||
class RepairDescriptionUsageSummariesTests(unittest.TestCase):
|
||||
def test_build_repaired_description_adds_usage_summary(self):
|
||||
description = (
|
||||
"Comprehensive performance optimization guide for React and Next.js applications, "
|
||||
"maintained by Vercel. Contains 45 rules across 8 categories."
|
||||
)
|
||||
body = """
|
||||
# Vercel React Best Practices
|
||||
|
||||
Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Writing new React components or Next.js pages
|
||||
- Reviewing code for performance issues
|
||||
- Refactoring existing React/Next.js code
|
||||
"""
|
||||
repaired = repair_descriptions.build_repaired_description(description, body)
|
||||
self.assertEqual(
|
||||
repaired,
|
||||
"Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Use when writing new React components or Next.js pages, reviewing code for performance issues, or refactoring existing React/Next.js code.",
|
||||
)
|
||||
|
||||
def test_build_repaired_description_skips_explicit_usage_descriptions(self):
|
||||
description = "Optimize React apps. Use when writing or reviewing React and Next.js code."
|
||||
body = """
|
||||
# Skill
|
||||
|
||||
Optimize React apps.
|
||||
|
||||
## When to Use
|
||||
- Writing React code
|
||||
"""
|
||||
repaired = repair_descriptions.build_repaired_description(description, body)
|
||||
self.assertIsNone(repaired)
|
||||
|
||||
def test_build_repaired_description_uses_body_sentence_when_description_is_label(self):
|
||||
description = "(React · TypeScript · Suspense-First · Production-Grade)"
|
||||
body = """
|
||||
# Frontend Development Guidelines
|
||||
|
||||
(React · TypeScript · Suspense-First · Production-Grade)
|
||||
|
||||
You are a senior frontend engineer operating under strict architectural and performance standards.
|
||||
|
||||
## When to Use
|
||||
- Creating components or pages
|
||||
- Adding new features
|
||||
- Fetching or mutating data
|
||||
"""
|
||||
repaired = repair_descriptions.build_repaired_description(description, body)
|
||||
self.assertEqual(
|
||||
repaired,
|
||||
"You are a senior frontend engineer operating under strict architectural and performance standards. Use when creating components or pages, adding new features, or fetching or mutating data.",
|
||||
)
|
||||
|
||||
def test_update_skill_file_rewrites_mirrored_description(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
skill_path = Path(temp_dir) / "SKILL.md"
|
||||
skill_path.write_text(
|
||||
"""---
|
||||
name: react-best-practices
|
||||
description: "Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories."
|
||||
risk: unknown
|
||||
source: community
|
||||
---
|
||||
|
||||
# Vercel React Best Practices
|
||||
|
||||
Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories.
|
||||
|
||||
## When to Use
|
||||
- Writing new React components or Next.js pages
|
||||
- Reviewing code for performance issues
|
||||
- Refactoring existing React/Next.js code
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
changed, new_description = repair_descriptions.update_skill_file(skill_path)
|
||||
|
||||
self.assertTrue(changed)
|
||||
self.assertIn("Use when writing new React components", new_description)
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
self.assertIn('description: "Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Use when writing new React components or Next.js pages, reviewing code for performance issues, or refactoring existing React/Next.js code."', updated)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user