263 lines
7.6 KiB
Python
263 lines
7.6 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from _project_paths import find_repo_root
|
|
from fix_truncated_descriptions import (
|
|
FRONTMATTER_PATTERN,
|
|
clamp_description,
|
|
is_usable_paragraph,
|
|
normalize_for_match,
|
|
normalize_text,
|
|
replace_description,
|
|
split_candidate_paragraphs,
|
|
strip_frontmatter,
|
|
)
|
|
from validate_skills import configure_utf8_output, parse_frontmatter
|
|
|
|
|
|
USAGE_CUE_PATTERNS = (
|
|
"use when",
|
|
"when to use",
|
|
"this skill should be used when",
|
|
"use this skill when",
|
|
"use this when",
|
|
"use it to",
|
|
"use for ",
|
|
"trigger on",
|
|
"triggers on",
|
|
)
|
|
WHEN_TO_USE_HEADINGS = {
|
|
"when to use",
|
|
"when to apply",
|
|
"when to activate",
|
|
}
|
|
BULLET_PATTERN = re.compile(r"^(?:[-*]\s+|\d+\.\s+)(.+)$")
|
|
SENTENCE_PATTERN = re.compile(r"^(.+?[.!?])(?:\s|$)")
|
|
|
|
|
|
def first_usable_paragraph(body: str) -> str | None:
|
|
paragraphs = [
|
|
paragraph
|
|
for paragraph in split_candidate_paragraphs(body)
|
|
if is_usable_paragraph(paragraph)
|
|
]
|
|
return paragraphs[0] if paragraphs else None
|
|
|
|
|
|
def has_explicit_usage_cue(description: str) -> bool:
|
|
lower = description.lower()
|
|
return any(phrase in lower for phrase in USAGE_CUE_PATTERNS)
|
|
|
|
|
|
def mirrors_intro_paragraph(description: str, body: str) -> bool:
|
|
intro = first_usable_paragraph(body)
|
|
if not intro:
|
|
return False
|
|
return normalize_for_match(description) == normalize_for_match(intro)
|
|
|
|
|
|
def extract_when_to_use_lines(body: str) -> list[str]:
|
|
lines = body.splitlines()
|
|
capturing = False
|
|
captured: list[str] = []
|
|
|
|
for raw_line in lines:
|
|
stripped = raw_line.strip()
|
|
heading_match = re.match(r"^(#{2,6})\s+(.*)$", stripped)
|
|
if heading_match:
|
|
heading = normalize_text(heading_match.group(2)).lower().rstrip(":")
|
|
if capturing:
|
|
break
|
|
capturing = heading in WHEN_TO_USE_HEADINGS
|
|
continue
|
|
|
|
if capturing:
|
|
captured.append(raw_line)
|
|
|
|
return captured
|
|
|
|
|
|
def lower_first_fragment(text: str) -> str:
|
|
if not text:
|
|
return text
|
|
first = text[0]
|
|
second = text[1] if len(text) > 1 else ""
|
|
if first.isupper() and second.islower():
|
|
return first.lower() + text[1:]
|
|
return text
|
|
|
|
|
|
def extract_usage_items(section_lines: list[str]) -> list[str]:
|
|
items: list[str] = []
|
|
for raw_line in section_lines:
|
|
stripped = raw_line.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
bullet_match = BULLET_PATTERN.match(stripped)
|
|
if bullet_match:
|
|
item = normalize_text(bullet_match.group(1)).rstrip(":.")
|
|
if ":" in item:
|
|
item = item.split(":", 1)[0].strip()
|
|
item = re.sub(r"^(?:when|whenever)\s+", "", item, flags=re.IGNORECASE)
|
|
if item:
|
|
items.append(lower_first_fragment(item))
|
|
return items
|
|
|
|
|
|
def build_usage_sentence(section_lines: list[str]) -> str | None:
|
|
items = extract_usage_items(section_lines)
|
|
if not items:
|
|
return None
|
|
|
|
items = items[:3]
|
|
if len(items) == 1:
|
|
return f"Use when {items[0]}."
|
|
if len(items) == 2:
|
|
return f"Use when {items[0]} or {items[1]}."
|
|
return f"Use when {items[0]}, {items[1]}, or {items[2]}."
|
|
|
|
|
|
def first_sentence(text: str) -> str:
|
|
normalized = normalize_text(text)
|
|
match = SENTENCE_PATTERN.match(normalized)
|
|
if match:
|
|
return match.group(1).strip()
|
|
return normalized
|
|
|
|
|
|
def is_substantial_capability(text: str) -> bool:
|
|
words = re.findall(r"[A-Za-z0-9]+", text)
|
|
stripped = text.strip()
|
|
return len(words) >= 6 and not (stripped.startswith("(") and stripped.endswith(")"))
|
|
|
|
|
|
def select_capability_sentence(description: str, body: str) -> str:
|
|
description_sentence = first_sentence(description)
|
|
if is_substantial_capability(description_sentence):
|
|
return description_sentence
|
|
|
|
description_key = normalize_for_match(description)
|
|
for paragraph in split_candidate_paragraphs(body):
|
|
if not is_usable_paragraph(paragraph):
|
|
continue
|
|
if normalize_for_match(paragraph) == description_key:
|
|
continue
|
|
|
|
candidate = first_sentence(paragraph)
|
|
if is_substantial_capability(candidate):
|
|
return candidate
|
|
|
|
return description_sentence
|
|
|
|
|
|
def ensure_terminal_punctuation(text: str) -> str:
|
|
stripped = text.rstrip()
|
|
if not stripped:
|
|
return stripped
|
|
if stripped.endswith((".", "!", "?")):
|
|
return stripped
|
|
return f"{stripped}."
|
|
|
|
|
|
def build_repaired_description(description: str, body: str) -> str | None:
|
|
if has_explicit_usage_cue(description):
|
|
return None
|
|
if not mirrors_intro_paragraph(description, body):
|
|
return None
|
|
|
|
usage_sentence = build_usage_sentence(extract_when_to_use_lines(body))
|
|
if not usage_sentence:
|
|
return None
|
|
|
|
capability_sentence = ensure_terminal_punctuation(select_capability_sentence(description, body))
|
|
candidate = clamp_description(f"{capability_sentence} {usage_sentence}")
|
|
if normalize_text(candidate) == normalize_text(description):
|
|
return None
|
|
return candidate
|
|
|
|
|
|
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
|
|
content = skill_path.read_text(encoding="utf-8")
|
|
match = FRONTMATTER_PATTERN.search(content)
|
|
if not match:
|
|
return False, None
|
|
|
|
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
|
if not metadata:
|
|
return False, None
|
|
|
|
description = metadata.get("description")
|
|
if not isinstance(description, str):
|
|
return False, None
|
|
|
|
new_description = build_repaired_description(description, strip_frontmatter(content))
|
|
if not new_description:
|
|
return False, None
|
|
|
|
updated_frontmatter = replace_description(match.group(1), new_description)
|
|
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
|
|
if updated_content == content:
|
|
return False, None
|
|
|
|
skill_path.write_text(updated_content, encoding="utf-8")
|
|
return True, new_description
|
|
|
|
|
|
def main() -> int:
|
|
configure_utf8_output()
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Repair synthetic descriptions by adding concise when-to-use guidance.",
|
|
)
|
|
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
|
|
args = parser.parse_args()
|
|
|
|
repo_root = find_repo_root(__file__)
|
|
skills_dir = repo_root / "skills"
|
|
|
|
fixed = 0
|
|
skipped = 0
|
|
for root, dirs, files in os.walk(skills_dir):
|
|
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
|
if "SKILL.md" not in files:
|
|
continue
|
|
|
|
skill_path = Path(root) / "SKILL.md"
|
|
content = skill_path.read_text(encoding="utf-8")
|
|
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
|
description = metadata.get("description") if metadata else None
|
|
if not isinstance(description, str):
|
|
continue
|
|
|
|
new_description = build_repaired_description(description, strip_frontmatter(content))
|
|
if not new_description:
|
|
continue
|
|
|
|
if args.dry_run:
|
|
fixed += 1
|
|
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
|
|
continue
|
|
|
|
changed, _ = update_skill_file(skill_path)
|
|
if changed:
|
|
fixed += 1
|
|
print(f"FIX {skill_path.relative_to(repo_root)}")
|
|
else:
|
|
skipped += 1
|
|
print(f"SKIP {skill_path.relative_to(repo_root)}")
|
|
|
|
print(f"\nFixed: {fixed}")
|
|
print(f"Skipped: {skipped}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|