Guard metadata repair and doc sync scripts against symlink targets so repo maintenance tasks cannot overwrite arbitrary local files. Replace recursive skill discovery with an iterative walk that skips symlinked directories, and harden the VideoDB listener to write only private regular files in the user-owned state directory. Also fix the broken pr:preflight script entry and make the last30days skill stop embedding raw user arguments directly in the shell command.
258 lines
8.1 KiB
Python
258 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from _safe_files import is_safe_regular_file
|
|
from _project_paths import find_repo_root
|
|
from validate_skills import configure_utf8_output, parse_frontmatter
|
|
|
|
|
|
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
|
|
MAX_DESCRIPTION_LENGTH = 300
|
|
MIN_PARAGRAPH_LENGTH = 40
|
|
TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*")
|
|
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
|
|
MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+")
|
|
HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
|
|
MULTISPACE_PATTERN = re.compile(r"\s+")
|
|
|
|
|
|
def strip_frontmatter(content: str) -> str:
|
|
match = FRONTMATTER_PATTERN.search(content)
|
|
if not match:
|
|
return content
|
|
return content[match.end():].lstrip()
|
|
|
|
|
|
def normalize_text(text: str) -> str:
|
|
text = text.strip()
|
|
text = re.sub(r"^\s*>+\s?", "", text)
|
|
text = MARKDOWN_DECORATION_PATTERN.sub("", text)
|
|
text = HTML_TAG_PATTERN.sub("", text)
|
|
text = MULTISPACE_PATTERN.sub(" ", text)
|
|
return text.strip()
|
|
|
|
|
|
def split_candidate_paragraphs(body: str) -> list[str]:
|
|
paragraphs: list[str] = []
|
|
current: list[str] = []
|
|
in_code_block = False
|
|
|
|
for raw_line in body.splitlines():
|
|
line = raw_line.rstrip()
|
|
stripped = line.strip()
|
|
|
|
if stripped.startswith("```"):
|
|
in_code_block = not in_code_block
|
|
if current:
|
|
paragraphs.append(" ".join(current))
|
|
current = []
|
|
continue
|
|
|
|
if in_code_block:
|
|
continue
|
|
|
|
if not stripped:
|
|
if current:
|
|
paragraphs.append(" ".join(current))
|
|
current = []
|
|
continue
|
|
|
|
if stripped.startswith("#"):
|
|
if current:
|
|
paragraphs.append(" ".join(current))
|
|
current = []
|
|
continue
|
|
|
|
if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")):
|
|
if current:
|
|
paragraphs.append(" ".join(current))
|
|
current = []
|
|
continue
|
|
|
|
current.append(stripped)
|
|
|
|
if current:
|
|
paragraphs.append(" ".join(current))
|
|
|
|
return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)]
|
|
|
|
|
|
def is_usable_paragraph(paragraph: str) -> bool:
|
|
lower = paragraph.lower()
|
|
if len(paragraph) < MIN_PARAGRAPH_LENGTH:
|
|
return False
|
|
if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")):
|
|
return False
|
|
if lower.startswith("this skill is applicable to execute the workflow"):
|
|
return False
|
|
return True
|
|
|
|
|
|
def normalize_for_match(text: str) -> str:
|
|
return re.sub(r"[^a-z0-9]+", "", text.lower())
|
|
|
|
|
|
def pick_candidate(description: str, body: str) -> str | None:
|
|
paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)]
|
|
if not paragraphs:
|
|
return None
|
|
|
|
desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip()
|
|
normalized_prefix = normalize_for_match(desc_prefix)
|
|
|
|
if normalized_prefix:
|
|
for paragraph in paragraphs:
|
|
normalized_paragraph = normalize_for_match(paragraph)
|
|
if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph:
|
|
return paragraph
|
|
|
|
return paragraphs[0]
|
|
|
|
|
|
def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
|
|
text = normalize_text(text)
|
|
if len(text) <= max_length:
|
|
return text
|
|
|
|
sentence_candidates = [". ", "! ", "? "]
|
|
best_split = -1
|
|
for marker in sentence_candidates:
|
|
split = text.rfind(marker, 0, max_length + 1)
|
|
if split > best_split:
|
|
best_split = split
|
|
|
|
if best_split != -1:
|
|
return text[: best_split + 1].strip()
|
|
|
|
split = text.rfind(" ", 0, max_length + 1)
|
|
if split == -1:
|
|
return text[:max_length].strip()
|
|
return text[:split].strip()
|
|
|
|
|
|
def escape_yaml_string(text: str) -> str:
|
|
return text.replace("\\", "\\\\").replace('"', '\\"')
|
|
|
|
|
|
def replace_description(frontmatter_text: str, new_description: str) -> str:
|
|
lines = frontmatter_text.splitlines()
|
|
replacement = f'description: "{escape_yaml_string(new_description)}"'
|
|
|
|
for index, line in enumerate(lines):
|
|
if not re.match(r"^\s*description:\s*", line):
|
|
continue
|
|
|
|
current_indent = len(line) - len(line.lstrip(" "))
|
|
end_index = index + 1
|
|
while end_index < len(lines):
|
|
candidate = lines[end_index]
|
|
stripped = candidate.strip()
|
|
candidate_indent = len(candidate) - len(candidate.lstrip(" "))
|
|
if not stripped:
|
|
end_index += 1
|
|
continue
|
|
if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped):
|
|
break
|
|
end_index += 1
|
|
|
|
updated = lines[:index] + [replacement] + lines[end_index:]
|
|
return "\n".join(updated)
|
|
|
|
raise ValueError("Description field not found in frontmatter.")
|
|
|
|
|
|
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
|
|
if not is_safe_regular_file(skill_path):
|
|
return False, None
|
|
|
|
content = skill_path.read_text(encoding="utf-8")
|
|
match = FRONTMATTER_PATTERN.search(content)
|
|
if not match:
|
|
return False, None
|
|
|
|
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
|
if not metadata:
|
|
return False, None
|
|
|
|
description = metadata.get("description")
|
|
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
|
|
return False, None
|
|
|
|
candidate = pick_candidate(description, strip_frontmatter(content))
|
|
if not candidate:
|
|
return False, None
|
|
|
|
new_description = clamp_description(candidate)
|
|
if not new_description or new_description == normalize_text(description):
|
|
return False, None
|
|
|
|
updated_frontmatter = replace_description(match.group(1), new_description)
|
|
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
|
|
if updated_content == content:
|
|
return False, None
|
|
|
|
skill_path.write_text(updated_content, encoding="utf-8")
|
|
return True, new_description
|
|
|
|
|
|
def main() -> int:
|
|
configure_utf8_output()
|
|
|
|
parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.")
|
|
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
|
|
args = parser.parse_args()
|
|
|
|
repo_root = find_repo_root(__file__)
|
|
skills_dir = repo_root / "skills"
|
|
|
|
fixed = 0
|
|
skipped = 0
|
|
for root, dirs, files in os.walk(skills_dir):
|
|
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
|
if "SKILL.md" not in files:
|
|
continue
|
|
|
|
skill_path = Path(root) / "SKILL.md"
|
|
if not is_safe_regular_file(skill_path):
|
|
print(f"SKIP {skill_path.relative_to(repo_root)} [symlinked_or_unreadable]")
|
|
continue
|
|
content = skill_path.read_text(encoding="utf-8")
|
|
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
|
description = metadata.get("description") if metadata else None
|
|
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
|
|
continue
|
|
|
|
candidate = pick_candidate(description, strip_frontmatter(content))
|
|
if not candidate:
|
|
skipped += 1
|
|
print(f"SKIP {skill_path.relative_to(repo_root)}")
|
|
continue
|
|
|
|
new_description = clamp_description(candidate)
|
|
if args.dry_run:
|
|
fixed += 1
|
|
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
|
|
continue
|
|
|
|
changed, _ = update_skill_file(skill_path)
|
|
if changed:
|
|
fixed += 1
|
|
print(f"FIX {skill_path.relative_to(repo_root)}")
|
|
else:
|
|
skipped += 1
|
|
print(f"SKIP {skill_path.relative_to(repo_root)}")
|
|
|
|
print(f"\nFixed: {fixed}")
|
|
print(f"Skipped: {skipped}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|