#!/usr/bin/env python3 from __future__ import annotations import argparse import os import re import sys from pathlib import Path from _project_paths import find_repo_root from validate_skills import configure_utf8_output, parse_frontmatter ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$") MAX_DESCRIPTION_LENGTH = 300 MIN_PARAGRAPH_LENGTH = 40 TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*") FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL) MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+") HTML_TAG_PATTERN = re.compile(r"<[^>]+>") MULTISPACE_PATTERN = re.compile(r"\s+") def strip_frontmatter(content: str) -> str: match = FRONTMATTER_PATTERN.search(content) if not match: return content return content[match.end():].lstrip() def normalize_text(text: str) -> str: text = text.strip() text = re.sub(r"^\s*>+\s?", "", text) text = MARKDOWN_DECORATION_PATTERN.sub("", text) text = HTML_TAG_PATTERN.sub("", text) text = MULTISPACE_PATTERN.sub(" ", text) return text.strip() def split_candidate_paragraphs(body: str) -> list[str]: paragraphs: list[str] = [] current: list[str] = [] in_code_block = False for raw_line in body.splitlines(): line = raw_line.rstrip() stripped = line.strip() if stripped.startswith("```"): in_code_block = not in_code_block if current: paragraphs.append(" ".join(current)) current = [] continue if in_code_block: continue if not stripped: if current: paragraphs.append(" ".join(current)) current = [] continue if stripped.startswith("#"): if current: paragraphs.append(" ".join(current)) current = [] continue if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")): if current: paragraphs.append(" ".join(current)) current = [] continue current.append(stripped) if current: paragraphs.append(" ".join(current)) return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)] def is_usable_paragraph(paragraph: str) -> bool: lower = paragraph.lower() if len(paragraph) < MIN_PARAGRAPH_LENGTH: return False if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")): return False if lower.startswith("this skill is applicable to execute the workflow"): return False return True def normalize_for_match(text: str) -> str: return re.sub(r"[^a-z0-9]+", "", text.lower()) def pick_candidate(description: str, body: str) -> str | None: paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)] if not paragraphs: return None desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip() normalized_prefix = normalize_for_match(desc_prefix) if normalized_prefix: for paragraph in paragraphs: normalized_paragraph = normalize_for_match(paragraph) if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph: return paragraph return paragraphs[0] def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str: text = normalize_text(text) if len(text) <= max_length: return text sentence_candidates = [". ", "! ", "? "] best_split = -1 for marker in sentence_candidates: split = text.rfind(marker, 0, max_length + 1) if split > best_split: best_split = split if best_split != -1: return text[: best_split + 1].strip() split = text.rfind(" ", 0, max_length + 1) if split == -1: return text[:max_length].strip() return text[:split].strip() def escape_yaml_string(text: str) -> str: return text.replace("\\", "\\\\").replace('"', '\\"') def replace_description(frontmatter_text: str, new_description: str) -> str: lines = frontmatter_text.splitlines() replacement = f'description: "{escape_yaml_string(new_description)}"' for index, line in enumerate(lines): if not re.match(r"^\s*description:\s*", line): continue current_indent = len(line) - len(line.lstrip(" ")) end_index = index + 1 while end_index < len(lines): candidate = lines[end_index] stripped = candidate.strip() candidate_indent = len(candidate) - len(candidate.lstrip(" ")) if not stripped: end_index += 1 continue if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped): break end_index += 1 updated = lines[:index] + [replacement] + lines[end_index:] return "\n".join(updated) raise ValueError("Description field not found in frontmatter.") def update_skill_file(skill_path: Path) -> tuple[bool, str | None]: content = skill_path.read_text(encoding="utf-8") match = FRONTMATTER_PATTERN.search(content) if not match: return False, None metadata, _ = parse_frontmatter(content, skill_path.as_posix()) if not metadata: return False, None description = metadata.get("description") if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()): return False, None candidate = pick_candidate(description, strip_frontmatter(content)) if not candidate: return False, None new_description = clamp_description(candidate) if not new_description or new_description == normalize_text(description): return False, None updated_frontmatter = replace_description(match.group(1), new_description) updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}" if updated_content == content: return False, None skill_path.write_text(updated_content, encoding="utf-8") return True, new_description def main() -> int: configure_utf8_output() parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.") parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.") args = parser.parse_args() repo_root = find_repo_root(__file__) skills_dir = repo_root / "skills" fixed = 0 skipped = 0 for root, dirs, files in os.walk(skills_dir): dirs[:] = [directory for directory in dirs if not directory.startswith(".")] if "SKILL.md" not in files: continue skill_path = Path(root) / "SKILL.md" content = skill_path.read_text(encoding="utf-8") metadata, _ = parse_frontmatter(content, skill_path.as_posix()) description = metadata.get("description") if metadata else None if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()): continue candidate = pick_candidate(description, strip_frontmatter(content)) if not candidate: skipped += 1 print(f"SKIP {skill_path.relative_to(repo_root)}") continue new_description = clamp_description(candidate) if args.dry_run: fixed += 1 print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}") continue changed, _ = update_skill_file(skill_path) if changed: fixed += 1 print(f"FIX {skill_path.relative_to(repo_root)}") else: skipped += 1 print(f"SKIP {skill_path.relative_to(repo_root)}") print(f"\nFixed: {fixed}") print(f"Skipped: {skipped}") return 0 if __name__ == "__main__": sys.exit(main())