Files
antigravity-skills-reference/tools/scripts/fix_truncated_descriptions.py
sickn33 fc3c7ae8a2 meta(skills): Add skill audit and safe metadata fixes
Add repo-wide auditing and targeted repair scripts for skill metadata.
Fix truncated descriptions automatically, keep heading normalization
conservative, and remove synthetic boilerplate sections that degrade
editorial quality while regenerating repo indexes and catalogs.

Fixes #365
2026-03-20 09:06:45 +01:00

251 lines
7.8 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from _project_paths import find_repo_root
from validate_skills import configure_utf8_output, parse_frontmatter
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
MAX_DESCRIPTION_LENGTH = 300
MIN_PARAGRAPH_LENGTH = 40
TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*")
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+")
HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
MULTISPACE_PATTERN = re.compile(r"\s+")
def strip_frontmatter(content: str) -> str:
match = FRONTMATTER_PATTERN.search(content)
if not match:
return content
return content[match.end():].lstrip()
def normalize_text(text: str) -> str:
text = text.strip()
text = re.sub(r"^\s*>+\s?", "", text)
text = MARKDOWN_DECORATION_PATTERN.sub("", text)
text = HTML_TAG_PATTERN.sub("", text)
text = MULTISPACE_PATTERN.sub(" ", text)
return text.strip()
def split_candidate_paragraphs(body: str) -> list[str]:
paragraphs: list[str] = []
current: list[str] = []
in_code_block = False
for raw_line in body.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```"):
in_code_block = not in_code_block
if current:
paragraphs.append(" ".join(current))
current = []
continue
if in_code_block:
continue
if not stripped:
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith("#"):
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")):
if current:
paragraphs.append(" ".join(current))
current = []
continue
current.append(stripped)
if current:
paragraphs.append(" ".join(current))
return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)]
def is_usable_paragraph(paragraph: str) -> bool:
lower = paragraph.lower()
if len(paragraph) < MIN_PARAGRAPH_LENGTH:
return False
if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")):
return False
if lower.startswith("this skill is applicable to execute the workflow"):
return False
return True
def normalize_for_match(text: str) -> str:
return re.sub(r"[^a-z0-9]+", "", text.lower())
def pick_candidate(description: str, body: str) -> str | None:
paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)]
if not paragraphs:
return None
desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip()
normalized_prefix = normalize_for_match(desc_prefix)
if normalized_prefix:
for paragraph in paragraphs:
normalized_paragraph = normalize_for_match(paragraph)
if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph:
return paragraph
return paragraphs[0]
def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
text = normalize_text(text)
if len(text) <= max_length:
return text
sentence_candidates = [". ", "! ", "? "]
best_split = -1
for marker in sentence_candidates:
split = text.rfind(marker, 0, max_length + 1)
if split > best_split:
best_split = split
if best_split != -1:
return text[: best_split + 1].strip()
split = text.rfind(" ", 0, max_length + 1)
if split == -1:
return text[:max_length].strip()
return text[:split].strip()
def escape_yaml_string(text: str) -> str:
return text.replace("\\", "\\\\").replace('"', '\\"')
def replace_description(frontmatter_text: str, new_description: str) -> str:
lines = frontmatter_text.splitlines()
replacement = f'description: "{escape_yaml_string(new_description)}"'
for index, line in enumerate(lines):
if not re.match(r"^\s*description:\s*", line):
continue
current_indent = len(line) - len(line.lstrip(" "))
end_index = index + 1
while end_index < len(lines):
candidate = lines[end_index]
stripped = candidate.strip()
candidate_indent = len(candidate) - len(candidate.lstrip(" "))
if not stripped:
end_index += 1
continue
if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped):
break
end_index += 1
updated = lines[:index] + [replacement] + lines[end_index:]
return "\n".join(updated)
raise ValueError("Description field not found in frontmatter.")
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
content = skill_path.read_text(encoding="utf-8")
match = FRONTMATTER_PATTERN.search(content)
if not match:
return False, None
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata:
return False, None
description = metadata.get("description")
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
return False, None
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
return False, None
new_description = clamp_description(candidate)
if not new_description or new_description == normalize_text(description):
return False, None
updated_frontmatter = replace_description(match.group(1), new_description)
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
if updated_content == content:
return False, None
skill_path.write_text(updated_content, encoding="utf-8")
return True, new_description
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.")
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
fixed = 0
skipped = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
description = metadata.get("description") if metadata else None
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
continue
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
continue
new_description = clamp_description(candidate)
if args.dry_run:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
continue
changed, _ = update_skill_file(skill_path)
if changed:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)}")
else:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
print(f"\nFixed: {fixed}")
print(f"Skipped: {skipped}")
return 0
if __name__ == "__main__":
sys.exit(main())