antigravity-skills-reference/tools/scripts/repair_description_usage_summaries.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import os
import re
import sys
from pathlib import Path

from _project_paths import find_repo_root
from fix_truncated_descriptions import (
    FRONTMATTER_PATTERN,
    clamp_description,
    is_usable_paragraph,
    normalize_for_match,
    normalize_text,
    replace_description,
    split_candidate_paragraphs,
    strip_frontmatter,
)
from validate_skills import configure_utf8_output, parse_frontmatter


USAGE_CUE_PATTERNS = (
    "use when",
    "when to use",
    "this skill should be used when",
    "use this skill when",
    "use this when",
    "use it to",
    "use for ",
    "trigger on",
    "triggers on",
)
WHEN_TO_USE_HEADINGS = {
    "when to use",
    "when to apply",
    "when to activate",
}
BULLET_PATTERN = re.compile(r"^(?:[-*]\s+|\d+\.\s+)(.+)$")
SENTENCE_PATTERN = re.compile(r"^(.+?[.!?])(?:\s|$)")


def first_usable_paragraph(body: str) -> str | None:
    paragraphs = [
        paragraph
        for paragraph in split_candidate_paragraphs(body)
        if is_usable_paragraph(paragraph)
    ]
    return paragraphs[0] if paragraphs else None


def has_explicit_usage_cue(description: str) -> bool:
    lower = description.lower()
    return any(phrase in lower for phrase in USAGE_CUE_PATTERNS)


def mirrors_intro_paragraph(description: str, body: str) -> bool:
    intro = first_usable_paragraph(body)
    if not intro:
        return False
    return normalize_for_match(description) == normalize_for_match(intro)


def extract_when_to_use_lines(body: str) -> list[str]:
    lines = body.splitlines()
    capturing = False
    captured: list[str] = []

    for raw_line in lines:
        stripped = raw_line.strip()
        heading_match = re.match(r"^(#{2,6})\s+(.*)$", stripped)
        if heading_match:
            heading = normalize_text(heading_match.group(2)).lower().rstrip(":")
            if capturing:
                break
            capturing = heading in WHEN_TO_USE_HEADINGS
            continue

        if capturing:
            captured.append(raw_line)

    return captured


def lower_first_fragment(text: str) -> str:
    if not text:
        return text
    first = text[0]
    second = text[1] if len(text) > 1 else ""
    if first.isupper() and second.islower():
        return first.lower() + text[1:]
    return text


def extract_usage_items(section_lines: list[str]) -> list[str]:
    items: list[str] = []
    for raw_line in section_lines:
        stripped = raw_line.strip()
        if not stripped:
            continue

        bullet_match = BULLET_PATTERN.match(stripped)
        if bullet_match:
            item = normalize_text(bullet_match.group(1)).rstrip(":.")
            if ":" in item:
                item = item.split(":", 1)[0].strip()
            item = re.sub(r"^(?:when|whenever)\s+", "", item, flags=re.IGNORECASE)
            if item:
                items.append(lower_first_fragment(item))
    return items


def build_usage_sentence(section_lines: list[str]) -> str | None:
    items = extract_usage_items(section_lines)
    if not items:
        return None

    items = items[:3]
    if len(items) == 1:
        return f"Use when {items[0]}."
    if len(items) == 2:
        return f"Use when {items[0]} or {items[1]}."
    return f"Use when {items[0]}, {items[1]}, or {items[2]}."


def first_sentence(text: str) -> str:
    normalized = normalize_text(text)
    match = SENTENCE_PATTERN.match(normalized)
    if match:
        return match.group(1).strip()
    return normalized


def is_substantial_capability(text: str) -> bool:
    words = re.findall(r"[A-Za-z0-9]+", text)
    stripped = text.strip()
    return len(words) >= 6 and not (stripped.startswith("(") and stripped.endswith(")"))


def select_capability_sentence(description: str, body: str) -> str:
    description_sentence = first_sentence(description)
    if is_substantial_capability(description_sentence):
        return description_sentence

    description_key = normalize_for_match(description)
    for paragraph in split_candidate_paragraphs(body):
        if not is_usable_paragraph(paragraph):
            continue
        if normalize_for_match(paragraph) == description_key:
            continue

        candidate = first_sentence(paragraph)
        if is_substantial_capability(candidate):
            return candidate

    return description_sentence


def ensure_terminal_punctuation(text: str) -> str:
    stripped = text.rstrip()
    if not stripped:
        return stripped
    if stripped.endswith((".", "!", "?")):
        return stripped
    return f"{stripped}."


def build_repaired_description(description: str, body: str) -> str | None:
    if has_explicit_usage_cue(description):
        return None
    if not mirrors_intro_paragraph(description, body):
        return None

    usage_sentence = build_usage_sentence(extract_when_to_use_lines(body))
    if not usage_sentence:
        return None

    capability_sentence = ensure_terminal_punctuation(select_capability_sentence(description, body))
    candidate = clamp_description(f"{capability_sentence} {usage_sentence}")
    if normalize_text(candidate) == normalize_text(description):
        return None
    return candidate


def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
    content = skill_path.read_text(encoding="utf-8")
    match = FRONTMATTER_PATTERN.search(content)
    if not match:
        return False, None

    metadata, _ = parse_frontmatter(content, skill_path.as_posix())
    if not metadata:
        return False, None

    description = metadata.get("description")
    if not isinstance(description, str):
        return False, None

    new_description = build_repaired_description(description, strip_frontmatter(content))
    if not new_description:
        return False, None

    updated_frontmatter = replace_description(match.group(1), new_description)
    updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
    if updated_content == content:
        return False, None

    skill_path.write_text(updated_content, encoding="utf-8")
    return True, new_description


def main() -> int:
    configure_utf8_output()

    parser = argparse.ArgumentParser(
        description="Repair synthetic descriptions by adding concise when-to-use guidance.",
    )
    parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
    args = parser.parse_args()

    repo_root = find_repo_root(__file__)
    skills_dir = repo_root / "skills"

    fixed = 0
    skipped = 0
    for root, dirs, files in os.walk(skills_dir):
        dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
        if "SKILL.md" not in files:
            continue

        skill_path = Path(root) / "SKILL.md"
        content = skill_path.read_text(encoding="utf-8")
        metadata, _ = parse_frontmatter(content, skill_path.as_posix())
        description = metadata.get("description") if metadata else None
        if not isinstance(description, str):
            continue

        new_description = build_repaired_description(description, strip_frontmatter(content))
        if not new_description:
            continue

        if args.dry_run:
            fixed += 1
            print(f"FIX  {skill_path.relative_to(repo_root)} -> {new_description}")
            continue

        changed, _ = update_skill_file(skill_path)
        if changed:
            fixed += 1
            print(f"FIX  {skill_path.relative_to(repo_root)}")
        else:
            skipped += 1
            print(f"SKIP {skill_path.relative_to(repo_root)}")

    print(f"\nFixed: {fixed}")
    print(f"Skipped: {skipped}")
    return 0


if __name__ == "__main__":
    sys.exit(main())