Files
antigravity-skills-reference/tools/scripts/cleanup_synthetic_skill_sections.py
sickn33 fc3c7ae8a2 meta(skills): Add skill audit and safe metadata fixes
Add repo-wide auditing and targeted repair scripts for skill metadata.
Fix truncated descriptions automatically, keep heading normalization
conservative, and remove synthetic boilerplate sections that degrade
editorial quality while regenerating repo indexes and catalogs.

Fixes #365
2026-03-20 09:06:45 +01:00

140 lines
4.6 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import subprocess
import sys
from pathlib import Path
from _project_paths import find_repo_root
from fix_missing_skill_sections import (
build_examples_section,
build_when_section,
has_examples,
has_when_to_use_section,
)
from validate_skills import configure_utf8_output, parse_frontmatter
def get_head_content(repo_root: Path, relative_path: Path) -> str | None:
result = subprocess.run(
["git", "show", f"HEAD:{relative_path.as_posix()}"],
cwd=repo_root,
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
return None
return result.stdout
def remove_exact_section(content: str, section_text: str) -> str:
normalized = content
escaped = re.escape(section_text.strip())
patterns = [
re.compile(rf"\n\n{escaped}\n(?=\n##\s|\n#\s|\Z)", re.DOTALL),
re.compile(rf"\n{escaped}\n(?=\n##\s|\n#\s|\Z)", re.DOTALL),
]
for pattern in patterns:
normalized, count = pattern.subn("\n", normalized, count=1)
if count:
break
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.rstrip() + "\n"
def cleanup_skill_file(repo_root: Path, skill_path: Path) -> tuple[bool, list[str]]:
current_content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(current_content, skill_path.as_posix())
if not metadata:
return False, []
description = metadata.get("description")
if not isinstance(description, str):
return False, []
relative_path = skill_path.relative_to(repo_root)
head_content = get_head_content(repo_root, relative_path)
if head_content is None:
return False, []
skill_name = str(metadata.get("name") or skill_path.parent.name)
generated_when = build_when_section(skill_name, description)
generated_examples = build_examples_section(skill_name, description)
updated = current_content
changes: list[str] = []
if generated_when in updated and not has_when_to_use_section(head_content):
updated = remove_exact_section(updated, generated_when)
changes.append("removed_synthetic_when_to_use")
if generated_examples in updated and not has_examples(head_content):
updated = remove_exact_section(updated, generated_examples)
changes.append("removed_synthetic_examples")
if updated != current_content:
skill_path.write_text(updated, encoding="utf-8")
return True, changes
return False, []
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Remove synthetic generic sections previously generated from descriptions.")
parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
modified = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
current_content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(current_content, skill_path.as_posix())
if not metadata or not isinstance(metadata.get("description"), str):
continue
relative_path = skill_path.relative_to(repo_root)
head_content = get_head_content(repo_root, relative_path)
if head_content is None:
continue
skill_name = str(metadata.get("name") or skill_path.parent.name)
generated_when = build_when_section(skill_name, metadata["description"])
generated_examples = build_examples_section(skill_name, metadata["description"])
changes: list[str] = []
if generated_when in current_content and not has_when_to_use_section(head_content):
changes.append("removed_synthetic_when_to_use")
if generated_examples in current_content and not has_examples(head_content):
changes.append("removed_synthetic_examples")
if not changes:
continue
if args.dry_run:
modified += 1
print(f"FIX {relative_path} [{', '.join(changes)}]")
continue
changed, actual_changes = cleanup_skill_file(repo_root, skill_path)
if changed:
modified += 1
print(f"FIX {relative_path} [{', '.join(actual_changes)}]")
print(f"\nModified: {modified}")
return 0
if __name__ == "__main__":
sys.exit(main())