Files
antigravity-skills-reference/tools/scripts/fix_truncated_descriptions.py
sickn33 3efff111d2 fix(security): Harden skill tooling file handling
Guard metadata repair and doc sync scripts against symlink targets so
repo maintenance tasks cannot overwrite arbitrary local files.

Replace recursive skill discovery with an iterative walk that skips
symlinked directories, and harden the VideoDB listener to write only
private regular files in the user-owned state directory.

Also fix the broken pr:preflight script entry and make the last30days
skill stop embedding raw user arguments directly in the shell command.
2026-03-21 11:50:16 +01:00

258 lines
8.1 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from _safe_files import is_safe_regular_file
from _project_paths import find_repo_root
from validate_skills import configure_utf8_output, parse_frontmatter
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
MAX_DESCRIPTION_LENGTH = 300
MIN_PARAGRAPH_LENGTH = 40
TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*")
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+")
HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
MULTISPACE_PATTERN = re.compile(r"\s+")
def strip_frontmatter(content: str) -> str:
match = FRONTMATTER_PATTERN.search(content)
if not match:
return content
return content[match.end():].lstrip()
def normalize_text(text: str) -> str:
text = text.strip()
text = re.sub(r"^\s*>+\s?", "", text)
text = MARKDOWN_DECORATION_PATTERN.sub("", text)
text = HTML_TAG_PATTERN.sub("", text)
text = MULTISPACE_PATTERN.sub(" ", text)
return text.strip()
def split_candidate_paragraphs(body: str) -> list[str]:
paragraphs: list[str] = []
current: list[str] = []
in_code_block = False
for raw_line in body.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```"):
in_code_block = not in_code_block
if current:
paragraphs.append(" ".join(current))
current = []
continue
if in_code_block:
continue
if not stripped:
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith("#"):
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")):
if current:
paragraphs.append(" ".join(current))
current = []
continue
current.append(stripped)
if current:
paragraphs.append(" ".join(current))
return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)]
def is_usable_paragraph(paragraph: str) -> bool:
lower = paragraph.lower()
if len(paragraph) < MIN_PARAGRAPH_LENGTH:
return False
if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")):
return False
if lower.startswith("this skill is applicable to execute the workflow"):
return False
return True
def normalize_for_match(text: str) -> str:
return re.sub(r"[^a-z0-9]+", "", text.lower())
def pick_candidate(description: str, body: str) -> str | None:
paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)]
if not paragraphs:
return None
desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip()
normalized_prefix = normalize_for_match(desc_prefix)
if normalized_prefix:
for paragraph in paragraphs:
normalized_paragraph = normalize_for_match(paragraph)
if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph:
return paragraph
return paragraphs[0]
def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
text = normalize_text(text)
if len(text) <= max_length:
return text
sentence_candidates = [". ", "! ", "? "]
best_split = -1
for marker in sentence_candidates:
split = text.rfind(marker, 0, max_length + 1)
if split > best_split:
best_split = split
if best_split != -1:
return text[: best_split + 1].strip()
split = text.rfind(" ", 0, max_length + 1)
if split == -1:
return text[:max_length].strip()
return text[:split].strip()
def escape_yaml_string(text: str) -> str:
return text.replace("\\", "\\\\").replace('"', '\\"')
def replace_description(frontmatter_text: str, new_description: str) -> str:
lines = frontmatter_text.splitlines()
replacement = f'description: "{escape_yaml_string(new_description)}"'
for index, line in enumerate(lines):
if not re.match(r"^\s*description:\s*", line):
continue
current_indent = len(line) - len(line.lstrip(" "))
end_index = index + 1
while end_index < len(lines):
candidate = lines[end_index]
stripped = candidate.strip()
candidate_indent = len(candidate) - len(candidate.lstrip(" "))
if not stripped:
end_index += 1
continue
if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped):
break
end_index += 1
updated = lines[:index] + [replacement] + lines[end_index:]
return "\n".join(updated)
raise ValueError("Description field not found in frontmatter.")
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
if not is_safe_regular_file(skill_path):
return False, None
content = skill_path.read_text(encoding="utf-8")
match = FRONTMATTER_PATTERN.search(content)
if not match:
return False, None
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata:
return False, None
description = metadata.get("description")
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
return False, None
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
return False, None
new_description = clamp_description(candidate)
if not new_description or new_description == normalize_text(description):
return False, None
updated_frontmatter = replace_description(match.group(1), new_description)
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
if updated_content == content:
return False, None
skill_path.write_text(updated_content, encoding="utf-8")
return True, new_description
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.")
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
fixed = 0
skipped = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
if not is_safe_regular_file(skill_path):
print(f"SKIP {skill_path.relative_to(repo_root)} [symlinked_or_unreadable]")
continue
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
description = metadata.get("description") if metadata else None
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
continue
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
continue
new_description = clamp_description(candidate)
if args.dry_run:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
continue
changed, _ = update_skill_file(skill_path)
if changed:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)}")
else:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
print(f"\nFixed: {fixed}")
print(f"Skipped: {skipped}")
return 0
if __name__ == "__main__":
sys.exit(main())