meta(skills): Add skill audit and safe metadata fixes

Add repo-wide auditing and targeted repair scripts for skill metadata.
Fix truncated descriptions automatically, keep heading normalization
conservative, and remove synthetic boilerplate sections that degrade
editorial quality while regenerating repo indexes and catalogs.

Fixes #365
This commit is contained in:
sickn33
2026-03-20 09:05:02 +01:00
parent 93f4448c6a
commit fc3c7ae8a2
544 changed files with 6128 additions and 5290 deletions

View File

@@ -0,0 +1,389 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import os
import re
import sys
from collections import Counter
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from _project_paths import find_repo_root
from validate_skills import configure_utf8_output, has_when_to_use_section, parse_frontmatter
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
FENCED_CODE_BLOCK_PATTERN = re.compile(r"^```", re.MULTILINE)
EXAMPLES_HEADING_PATTERNS = [
re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE),
]
LIMITATIONS_HEADING_PATTERNS = [
re.compile(r"^##\s+Limitations?\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Known\s+Limitations?\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Constraints?\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Out\s+of\s+Scope\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+What\s+(This\s+Skill\s+)?Does(?:\s+Not|n't)\s+Do\b", re.MULTILINE | re.IGNORECASE),
]
MARKDOWN_LINK_PATTERN = re.compile(r"\[[^\]]*\]\(([^)]+)\)")
DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$")
SECURITY_DISCLAIMER_PATTERN = re.compile(r"AUTHORIZED USE ONLY", re.IGNORECASE)
VALID_RISK_LEVELS = {"none", "safe", "critical", "offensive", "unknown"}
DEFAULT_MARKDOWN_TOP_FINDINGS = 15
DEFAULT_MARKDOWN_TOP_SKILLS = 20
@dataclass(frozen=True)
class Finding:
severity: str
code: str
message: str
def to_dict(self) -> dict[str, str]:
return {
"severity": self.severity,
"code": self.code,
"message": self.message,
}
def has_examples(content: str) -> bool:
return bool(FENCED_CODE_BLOCK_PATTERN.search(content)) or any(
pattern.search(content) for pattern in EXAMPLES_HEADING_PATTERNS
)
def has_limitations(content: str) -> bool:
return any(pattern.search(content) for pattern in LIMITATIONS_HEADING_PATTERNS)
def find_dangling_links(content: str, skill_root: Path) -> list[str]:
broken_links: list[str] = []
for link in MARKDOWN_LINK_PATTERN.findall(content):
link_clean = link.split("#", 1)[0].strip()
if not link_clean or link_clean.startswith(("http://", "https://", "mailto:", "<", ">")):
continue
if os.path.isabs(link_clean):
continue
target_path = (skill_root / link_clean).resolve()
if not target_path.exists():
broken_links.append(link)
return broken_links
def build_skill_report(skill_root: Path, skills_dir: Path) -> dict[str, object]:
skill_file = skill_root / "SKILL.md"
rel_dir = skill_root.relative_to(skills_dir).as_posix()
rel_file = f"{rel_dir}/SKILL.md"
findings: list[Finding] = []
if skill_file.is_symlink():
findings.append(
Finding(
"warning",
"symlinked_skill_markdown",
"SKILL.md is a symlink and was not audited for safety or usability.",
)
)
return finalize_skill_report(rel_dir, rel_file, findings)
try:
content = skill_file.read_text(encoding="utf-8")
except Exception as exc: # pragma: no cover - defensive guard
findings.append(Finding("error", "unreadable_file", f"Unable to read SKILL.md: {exc}"))
return finalize_skill_report(rel_dir, rel_file, findings)
metadata, fm_errors = parse_frontmatter(content, rel_file)
if metadata is None:
findings.append(Finding("error", "invalid_frontmatter", "Missing or malformed YAML frontmatter."))
return finalize_skill_report(rel_dir, rel_file, findings)
for error in fm_errors:
findings.append(Finding("error", "invalid_frontmatter", error))
name = metadata.get("name")
description = metadata.get("description")
risk = metadata.get("risk")
source = metadata.get("source")
date_added = metadata.get("date_added")
if name != skill_root.name:
findings.append(
Finding(
"error",
"name_mismatch",
f"Frontmatter name '{name}' does not match folder name '{skill_root.name}'.",
)
)
if description is None:
findings.append(Finding("error", "missing_description", "Missing frontmatter description."))
elif not isinstance(description, str):
findings.append(
Finding(
"error",
"invalid_description_type",
f"Description must be a string, got {type(description).__name__}.",
)
)
else:
stripped_description = description.strip()
if not stripped_description:
findings.append(Finding("error", "empty_description", "Description is empty or whitespace only."))
if len(description) > 300:
findings.append(
Finding(
"error",
"description_too_long",
f"Description is {len(description)} characters long; keep it concise.",
)
)
if ELLIPSIS_PATTERN.search(stripped_description):
findings.append(
Finding(
"warning",
"description_truncated",
"Description ends with an ellipsis and likely needs cleanup for issue #365.",
)
)
if risk is None:
findings.append(Finding("warning", "missing_risk", "Missing risk classification."))
elif risk not in VALID_RISK_LEVELS:
findings.append(
Finding(
"error",
"invalid_risk",
f"Risk must be one of {sorted(VALID_RISK_LEVELS)}, got '{risk}'.",
)
)
if source is None:
findings.append(Finding("warning", "missing_source", "Missing source attribution."))
if date_added is not None and not DATE_PATTERN.match(str(date_added)):
findings.append(
Finding(
"error",
"invalid_date_added",
f"date_added must use YYYY-MM-DD format, got '{date_added}'.",
)
)
if not has_when_to_use_section(content):
findings.append(Finding("warning", "missing_when_to_use", "Missing a recognized 'When to Use' section."))
if not has_examples(content):
findings.append(Finding("warning", "missing_examples", "Missing an example section or fenced example block."))
if not has_limitations(content):
findings.append(Finding("warning", "missing_limitations", "Missing a limitations/constraints section."))
line_count = content.count("\n") + 1
if line_count > 500:
findings.append(
Finding(
"warning",
"skill_too_long",
f"SKILL.md is {line_count} lines long; consider splitting into references/.",
)
)
for broken_link in find_dangling_links(content, skill_root):
findings.append(
Finding(
"error",
"dangling_link",
f"Broken relative markdown link: {broken_link}",
)
)
if risk == "offensive" and not SECURITY_DISCLAIMER_PATTERN.search(content):
findings.append(
Finding(
"error",
"missing_authorized_use_only",
"Offensive skill is missing the required 'AUTHORIZED USE ONLY' disclaimer.",
)
)
return finalize_skill_report(rel_dir, rel_file, findings)
def finalize_skill_report(skill_id: str, rel_file: str, findings: list[Finding]) -> dict[str, object]:
severity_counts = Counter(finding.severity for finding in findings)
if severity_counts["error"] > 0:
status = "error"
elif severity_counts["warning"] > 0:
status = "warning"
else:
status = "ok"
return {
"id": skill_id,
"path": rel_file,
"status": status,
"error_count": severity_counts["error"],
"warning_count": severity_counts["warning"],
"info_count": severity_counts["info"],
"findings": [finding.to_dict() for finding in findings],
}
def audit_skills(skills_dir: str | Path) -> dict[str, object]:
configure_utf8_output()
skills_root = Path(skills_dir).resolve()
reports: list[dict[str, object]] = []
for root, dirs, files in os.walk(skills_root):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
reports.append(build_skill_report(Path(root), skills_root))
reports.sort(key=lambda report: str(report["id"]).lower())
code_counts = Counter()
severity_counts = Counter()
for report in reports:
for finding in report["findings"]:
code_counts[finding["code"]] += 1
severity_counts[finding["severity"]] += 1
summary = {
"skills_scanned": len(reports),
"skills_ok": sum(report["status"] == "ok" for report in reports),
"skills_with_errors": sum(report["status"] == "error" for report in reports),
"skills_with_warnings_only": sum(report["status"] == "warning" for report in reports),
"errors": severity_counts["error"],
"warnings": severity_counts["warning"],
"infos": severity_counts["info"],
"top_finding_codes": [
{"code": code, "count": count}
for code, count in code_counts.most_common()
],
}
return {
"generated_at": datetime.now(timezone.utc).isoformat(),
"skills_dir": str(skills_root),
"summary": summary,
"skills": reports,
}
def write_markdown_report(report: dict[str, object], destination: str | Path) -> None:
summary = report["summary"]
skills = report["skills"]
top_findings = summary["top_finding_codes"][:DEFAULT_MARKDOWN_TOP_FINDINGS]
top_skills = [
skill for skill in skills if skill["status"] != "ok"
][:DEFAULT_MARKDOWN_TOP_SKILLS]
lines = [
"# Skills Audit Report",
"",
f"Generated at: `{report['generated_at']}`",
"",
"## Summary",
"",
f"- Skills scanned: **{summary['skills_scanned']}**",
f"- Skills ready: **{summary['skills_ok']}**",
f"- Skills with errors: **{summary['skills_with_errors']}**",
f"- Skills with warnings only: **{summary['skills_with_warnings_only']}**",
f"- Total errors: **{summary['errors']}**",
f"- Total warnings: **{summary['warnings']}**",
"",
"## Top Finding Codes",
"",
"| Code | Count |",
"| --- | ---: |",
]
if top_findings:
lines.extend(f"| `{item['code']}` | {item['count']} |" for item in top_findings)
else:
lines.append("| _none_ | 0 |")
lines.extend(
[
"",
"## Skills Needing Attention",
"",
"| Skill | Status | Errors | Warnings |",
"| --- | --- | ---: | ---: |",
]
)
if top_skills:
lines.extend(
f"| `{skill['id']}` | {skill['status']} | {skill['error_count']} | {skill['warning_count']} |"
for skill in top_skills
)
else:
lines.append("| _none_ | ok | 0 | 0 |")
Path(destination).write_text("\n".join(lines) + "\n", encoding="utf-8")
def print_summary(report: dict[str, object]) -> None:
summary = report["summary"]
print("🔎 Skills audit completed")
print(f" Skills scanned: {summary['skills_scanned']}")
print(f" Ready: {summary['skills_ok']}")
print(f" Warning only: {summary['skills_with_warnings_only']}")
print(f" With errors: {summary['skills_with_errors']}")
print(f" Total warnings: {summary['warnings']}")
print(f" Total errors: {summary['errors']}")
top_findings = summary["top_finding_codes"][:10]
if top_findings:
print(" Top findings:")
for item in top_findings:
print(f" - {item['code']}: {item['count']}")
def main() -> int:
parser = argparse.ArgumentParser(description="Audit every SKILL.md for conformance and baseline usability.")
parser.add_argument(
"--json-out",
help="Write the full machine-readable audit report to this path.",
)
parser.add_argument(
"--markdown-out",
help="Write a concise Markdown summary to this path.",
)
parser.add_argument(
"--strict",
action="store_true",
help="Exit with code 1 when warnings are present, not only errors.",
)
args = parser.parse_args()
repo_root = find_repo_root(__file__)
report = audit_skills(repo_root / "skills")
print_summary(report)
if args.json_out:
Path(args.json_out).write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
print(f"📝 Wrote JSON audit report to {args.json_out}")
if args.markdown_out:
write_markdown_report(report, args.markdown_out)
print(f"📝 Wrote Markdown audit report to {args.markdown_out}")
summary = report["summary"]
if summary["errors"] > 0:
return 1
if args.strict and summary["warnings"] > 0:
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import subprocess
import sys
from pathlib import Path
from _project_paths import find_repo_root
from fix_missing_skill_sections import (
build_examples_section,
build_when_section,
has_examples,
has_when_to_use_section,
)
from validate_skills import configure_utf8_output, parse_frontmatter
def get_head_content(repo_root: Path, relative_path: Path) -> str | None:
result = subprocess.run(
["git", "show", f"HEAD:{relative_path.as_posix()}"],
cwd=repo_root,
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
return None
return result.stdout
def remove_exact_section(content: str, section_text: str) -> str:
normalized = content
escaped = re.escape(section_text.strip())
patterns = [
re.compile(rf"\n\n{escaped}\n(?=\n##\s|\n#\s|\Z)", re.DOTALL),
re.compile(rf"\n{escaped}\n(?=\n##\s|\n#\s|\Z)", re.DOTALL),
]
for pattern in patterns:
normalized, count = pattern.subn("\n", normalized, count=1)
if count:
break
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.rstrip() + "\n"
def cleanup_skill_file(repo_root: Path, skill_path: Path) -> tuple[bool, list[str]]:
current_content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(current_content, skill_path.as_posix())
if not metadata:
return False, []
description = metadata.get("description")
if not isinstance(description, str):
return False, []
relative_path = skill_path.relative_to(repo_root)
head_content = get_head_content(repo_root, relative_path)
if head_content is None:
return False, []
skill_name = str(metadata.get("name") or skill_path.parent.name)
generated_when = build_when_section(skill_name, description)
generated_examples = build_examples_section(skill_name, description)
updated = current_content
changes: list[str] = []
if generated_when in updated and not has_when_to_use_section(head_content):
updated = remove_exact_section(updated, generated_when)
changes.append("removed_synthetic_when_to_use")
if generated_examples in updated and not has_examples(head_content):
updated = remove_exact_section(updated, generated_examples)
changes.append("removed_synthetic_examples")
if updated != current_content:
skill_path.write_text(updated, encoding="utf-8")
return True, changes
return False, []
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Remove synthetic generic sections previously generated from descriptions.")
parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
modified = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
current_content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(current_content, skill_path.as_posix())
if not metadata or not isinstance(metadata.get("description"), str):
continue
relative_path = skill_path.relative_to(repo_root)
head_content = get_head_content(repo_root, relative_path)
if head_content is None:
continue
skill_name = str(metadata.get("name") or skill_path.parent.name)
generated_when = build_when_section(skill_name, metadata["description"])
generated_examples = build_examples_section(skill_name, metadata["description"])
changes: list[str] = []
if generated_when in current_content and not has_when_to_use_section(head_content):
changes.append("removed_synthetic_when_to_use")
if generated_examples in current_content and not has_examples(head_content):
changes.append("removed_synthetic_examples")
if not changes:
continue
if args.dry_run:
modified += 1
print(f"FIX {relative_path} [{', '.join(changes)}]")
continue
changed, actual_changes = cleanup_skill_file(repo_root, skill_path)
if changed:
modified += 1
print(f"FIX {relative_path} [{', '.join(actual_changes)}]")
print(f"\nModified: {modified}")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from _project_paths import find_repo_root
from validate_skills import configure_utf8_output, has_when_to_use_section, parse_frontmatter
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
WHEN_SECTION_VARIANT_PATTERNS = [
(re.compile(r"^##\s*when to apply\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
(re.compile(r"^##\s*when to activate\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
(re.compile(r"^##\s*\d+[.)]?\s*when to use(?: this skill)?\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
(re.compile(r"^##\s*when to use\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
]
EXAMPLES_HEADING_PATTERN = re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE)
USAGE_HEADING_PATTERN = re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE)
FENCED_CODE_BLOCK_PATTERN = re.compile(r"^```", re.MULTILINE)
MULTISPACE_PATTERN = re.compile(r"\s+")
def has_examples(content: str) -> bool:
return bool(
FENCED_CODE_BLOCK_PATTERN.search(content)
or EXAMPLES_HEADING_PATTERN.search(content)
or USAGE_HEADING_PATTERN.search(content)
)
def normalize_whitespace(text: str) -> str:
return MULTISPACE_PATTERN.sub(" ", text.strip())
def normalize_when_heading_variants(content: str) -> str:
updated = content
for pattern, replacement in WHEN_SECTION_VARIANT_PATTERNS:
updated = pattern.sub(replacement, updated)
return updated
def normalize_description_for_prompt(description: str) -> str:
text = normalize_whitespace(description).rstrip(".")
if text.lower().startswith("this skill should be used when "):
text = "Use this skill when " + text[len("this skill should be used when "):]
elif text.lower().startswith("always use this skill when "):
text = "Use this skill when " + text[len("always use this skill when "):]
elif text.lower().startswith("use when "):
text = "Use this skill when " + text[len("use when "):]
return text
def build_when_section(skill_name: str, description: str) -> str:
normalized = normalize_description_for_prompt(description)
lower = normalized.lower()
if lower.startswith("use this skill when "):
sentence = normalized[0].upper() + normalized[1:]
elif lower.startswith("use when "):
sentence = "Use this skill when " + normalized[len("Use when "):]
else:
sentence = f"Use this skill when the task matches this description: {normalized}."
return "\n".join(
[
"## When to Use",
f"- {sentence}",
]
)
def build_examples_section(skill_name: str, description: str) -> str:
normalized = normalize_whitespace(description).rstrip(".")
return "\n".join(
[
"## Examples",
"```text",
f"Use @{skill_name} for this task: {normalized}.",
"",
"Apply the skill to my current work and walk me through the safest next steps,",
"key checks, and the concrete output I should produce.",
"```",
]
)
def find_insert_after_intro(content: str) -> int:
body_start = 0
match = FRONTMATTER_PATTERN.search(content)
if match:
body_start = match.end()
remainder = content[body_start:]
section_match = re.search(r"^##\s+", remainder, re.MULTILINE)
if section_match:
return body_start + section_match.start()
return len(content)
def insert_section_after_intro(content: str, section_text: str) -> str:
insert_at = find_insert_after_intro(content)
prefix = content[:insert_at].rstrip() + "\n\n"
suffix = content[insert_at:].lstrip()
if suffix:
return prefix + section_text + "\n\n" + suffix
return prefix + section_text + "\n"
def append_section(content: str, section_text: str) -> str:
return content.rstrip() + "\n\n" + section_text + "\n"
def update_skill_file(skill_path: Path, *, add_missing: bool = False) -> tuple[bool, list[str]]:
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata:
return False, []
updated = normalize_when_heading_variants(content)
changes: list[str] = []
description = metadata.get("description")
skill_name = str(metadata.get("name") or skill_path.parent.name)
if isinstance(description, str):
if updated != content:
changes.append("normalized_when_heading")
if add_missing and not has_when_to_use_section(updated):
updated = insert_section_after_intro(updated, build_when_section(skill_name, description))
changes.append("added_when_to_use")
if add_missing and not has_examples(updated):
updated = append_section(updated, build_examples_section(skill_name, description))
changes.append("added_examples")
if updated != content:
skill_path.write_text(updated, encoding="utf-8")
return True, changes
return False, changes
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Normalize skill section headings and optionally add missing sections.")
parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing files.")
parser.add_argument(
"--add-missing",
action="store_true",
help="Also synthesize missing 'When to Use' and 'Examples' sections from the description.",
)
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
modified = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata or not isinstance(metadata.get("description"), str):
continue
simulated = normalize_when_heading_variants(content)
needs_when = args.add_missing and not has_when_to_use_section(simulated)
needs_examples = args.add_missing and not has_examples(simulated)
if not needs_when and not needs_examples and simulated == content:
continue
if args.dry_run:
change_labels: list[str] = []
if simulated != content:
change_labels.append("normalized_when_heading")
if needs_when:
change_labels.append("added_when_to_use")
if needs_examples:
change_labels.append("added_examples")
modified += 1
print(f"FIX {skill_path.relative_to(repo_root)} [{', '.join(change_labels)}]")
continue
changed, changes = update_skill_file(skill_path, add_missing=args.add_missing)
if changed:
modified += 1
print(f"FIX {skill_path.relative_to(repo_root)} [{', '.join(changes)}]")
print(f"\nModified: {modified}")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,250 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from _project_paths import find_repo_root
from validate_skills import configure_utf8_output, parse_frontmatter
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
MAX_DESCRIPTION_LENGTH = 300
MIN_PARAGRAPH_LENGTH = 40
TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*")
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+")
HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
MULTISPACE_PATTERN = re.compile(r"\s+")
def strip_frontmatter(content: str) -> str:
match = FRONTMATTER_PATTERN.search(content)
if not match:
return content
return content[match.end():].lstrip()
def normalize_text(text: str) -> str:
text = text.strip()
text = re.sub(r"^\s*>+\s?", "", text)
text = MARKDOWN_DECORATION_PATTERN.sub("", text)
text = HTML_TAG_PATTERN.sub("", text)
text = MULTISPACE_PATTERN.sub(" ", text)
return text.strip()
def split_candidate_paragraphs(body: str) -> list[str]:
paragraphs: list[str] = []
current: list[str] = []
in_code_block = False
for raw_line in body.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```"):
in_code_block = not in_code_block
if current:
paragraphs.append(" ".join(current))
current = []
continue
if in_code_block:
continue
if not stripped:
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith("#"):
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")):
if current:
paragraphs.append(" ".join(current))
current = []
continue
current.append(stripped)
if current:
paragraphs.append(" ".join(current))
return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)]
def is_usable_paragraph(paragraph: str) -> bool:
lower = paragraph.lower()
if len(paragraph) < MIN_PARAGRAPH_LENGTH:
return False
if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")):
return False
if lower.startswith("this skill is applicable to execute the workflow"):
return False
return True
def normalize_for_match(text: str) -> str:
return re.sub(r"[^a-z0-9]+", "", text.lower())
def pick_candidate(description: str, body: str) -> str | None:
paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)]
if not paragraphs:
return None
desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip()
normalized_prefix = normalize_for_match(desc_prefix)
if normalized_prefix:
for paragraph in paragraphs:
normalized_paragraph = normalize_for_match(paragraph)
if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph:
return paragraph
return paragraphs[0]
def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
text = normalize_text(text)
if len(text) <= max_length:
return text
sentence_candidates = [". ", "! ", "? "]
best_split = -1
for marker in sentence_candidates:
split = text.rfind(marker, 0, max_length + 1)
if split > best_split:
best_split = split
if best_split != -1:
return text[: best_split + 1].strip()
split = text.rfind(" ", 0, max_length + 1)
if split == -1:
return text[:max_length].strip()
return text[:split].strip()
def escape_yaml_string(text: str) -> str:
return text.replace("\\", "\\\\").replace('"', '\\"')
def replace_description(frontmatter_text: str, new_description: str) -> str:
lines = frontmatter_text.splitlines()
replacement = f'description: "{escape_yaml_string(new_description)}"'
for index, line in enumerate(lines):
if not re.match(r"^\s*description:\s*", line):
continue
current_indent = len(line) - len(line.lstrip(" "))
end_index = index + 1
while end_index < len(lines):
candidate = lines[end_index]
stripped = candidate.strip()
candidate_indent = len(candidate) - len(candidate.lstrip(" "))
if not stripped:
end_index += 1
continue
if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped):
break
end_index += 1
updated = lines[:index] + [replacement] + lines[end_index:]
return "\n".join(updated)
raise ValueError("Description field not found in frontmatter.")
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
content = skill_path.read_text(encoding="utf-8")
match = FRONTMATTER_PATTERN.search(content)
if not match:
return False, None
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata:
return False, None
description = metadata.get("description")
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
return False, None
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
return False, None
new_description = clamp_description(candidate)
if not new_description or new_description == normalize_text(description):
return False, None
updated_frontmatter = replace_description(match.group(1), new_description)
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
if updated_content == content:
return False, None
skill_path.write_text(updated_content, encoding="utf-8")
return True, new_description
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.")
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
fixed = 0
skipped = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
description = metadata.get("description") if metadata else None
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
continue
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
continue
new_description = clamp_description(candidate)
if args.dry_run:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
continue
changed, _ = update_skill_file(skill_path)
if changed:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)}")
else:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
print(f"\nFixed: {fixed}")
print(f"Skipped: {skipped}")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -18,6 +18,10 @@ const LOCAL_TEST_COMMANDS = [
[path.join(TOOL_TESTS, "workflow_contracts.test.js")],
[path.join(TOOL_TESTS, "docs_security_content.test.js")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_bundle_activation_security.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_audit_skills.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_cleanup_synthetic_skill_sections.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_missing_skill_sections.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_truncated_descriptions.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_sync_microsoft_skills_security.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_validate_skills_headings.py")],
];

View File

@@ -0,0 +1,142 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
audit_skills = load_module("tools/scripts/audit_skills.py", "audit_skills")
class AuditSkillsTests(unittest.TestCase):
def test_audit_marks_complete_skill_as_ok(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "good-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: good-skill
description: Useful and complete skill description
risk: safe
source: self
date_added: 2026-03-20
---
# Good Skill
## When to Use
- Use when the user needs a solid example.
## Examples
```bash
echo "hello"
```
## Limitations
- Demo only.
""",
encoding="utf-8",
)
report = audit_skills.audit_skills(skills_dir)
self.assertEqual(report["summary"]["skills_scanned"], 1)
self.assertEqual(report["summary"]["skills_ok"], 1)
self.assertEqual(report["summary"]["warnings"], 0)
self.assertEqual(report["summary"]["errors"], 0)
self.assertEqual(report["skills"][0]["status"], "ok")
def test_audit_flags_truncated_description_and_missing_sections(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "truncated-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: truncated-skill
description: This description was cut off...
risk: safe
source: self
---
# Truncated Skill
## When to Use
- Use when reproducing issue 365.
""",
encoding="utf-8",
)
report = audit_skills.audit_skills(skills_dir)
finding_codes = {finding["code"] for finding in report["skills"][0]["findings"]}
self.assertEqual(report["skills"][0]["status"], "warning")
self.assertIn("description_truncated", finding_codes)
self.assertIn("missing_examples", finding_codes)
self.assertIn("missing_limitations", finding_codes)
def test_audit_flags_blocking_errors(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "offensive-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "missing.md").write_text("# missing\n", encoding="utf-8")
(skill_dir / "SKILL.md").write_text(
"""---
name: offensive-skill
description: Offensive example skill
risk: offensive
source: self
---
# Offensive Skill
## When to Use
- Use only in authorized environments.
## Examples
```bash
cat missing.md
```
See [details](missing-reference.md).
## Limitations
- Example only.
""",
encoding="utf-8",
)
report = audit_skills.audit_skills(skills_dir)
finding_codes = {finding["code"] for finding in report["skills"][0]["findings"]}
self.assertEqual(report["skills"][0]["status"], "error")
self.assertIn("dangling_link", finding_codes)
self.assertIn("missing_authorized_use_only", finding_codes)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,162 @@
import importlib.util
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path
from unittest import mock
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
cleanup_synthetic_skill_sections = load_module(
"tools/scripts/cleanup_synthetic_skill_sections.py",
"cleanup_synthetic_skill_sections",
)
fix_missing_skill_sections = load_module(
"tools/scripts/fix_missing_skill_sections.py",
"fix_missing_skill_sections_for_cleanup_tests",
)
class CleanupSyntheticSkillSectionsTests(unittest.TestCase):
def test_remove_exact_section_preserves_other_content(self):
content = """---
name: demo
description: Demo description.
---
# Demo
## When to Use
- Use this skill when demo work is needed.
## Examples
```text
Use @demo for this task:
foo
```
## Notes
Keep this section.
"""
section = """## Examples
```text
Use @demo for this task:
foo
```"""
updated = cleanup_synthetic_skill_sections.remove_exact_section(content, section)
self.assertNotIn("## Examples", updated)
self.assertIn("## Notes", updated)
self.assertIn("Keep this section.", updated)
def test_cleanup_skill_file_removes_only_generated_sections_missing_from_head(self):
with tempfile.TemporaryDirectory() as temp_dir:
repo_root = Path(temp_dir)
skill_dir = repo_root / "skills" / "demo"
skill_dir.mkdir(parents=True)
skill_path = skill_dir / "SKILL.md"
description = "Build and distribute Expo development clients locally or via TestFlight."
generated_when = fix_missing_skill_sections.build_when_section("demo", description)
generated_examples = fix_missing_skill_sections.build_examples_section("demo", description)
current_content = f"""---
name: demo
description: {description}
---
# Demo
{generated_when}
{generated_examples}
## Notes
Human-written content.
"""
skill_path.write_text(current_content, encoding="utf-8")
head_content = f"""---
name: demo
description: {description}
---
# Demo
## Notes
Human-written content.
"""
with mock.patch.object(
cleanup_synthetic_skill_sections,
"get_head_content",
return_value=head_content,
):
changed, changes = cleanup_synthetic_skill_sections.cleanup_skill_file(repo_root, skill_path)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertEqual(
changes,
["removed_synthetic_when_to_use", "removed_synthetic_examples"],
)
self.assertNotIn(generated_when, updated)
self.assertNotIn(generated_examples, updated)
self.assertIn("## Notes", updated)
def test_cleanup_skill_file_keeps_real_sections_that_already_existed_in_head(self):
with tempfile.TemporaryDirectory() as temp_dir:
repo_root = Path(temp_dir)
skill_dir = repo_root / "skills" / "demo"
skill_dir.mkdir(parents=True)
skill_path = skill_dir / "SKILL.md"
description = "Build and distribute Expo development clients locally or via TestFlight."
generated_when = fix_missing_skill_sections.build_when_section("demo", description)
generated_examples = fix_missing_skill_sections.build_examples_section("demo", description)
current_content = f"""---
name: demo
description: {description}
---
# Demo
{generated_when}
{generated_examples}
"""
skill_path.write_text(current_content, encoding="utf-8")
with mock.patch.object(
cleanup_synthetic_skill_sections,
"get_head_content",
return_value=current_content,
):
changed, changes = cleanup_synthetic_skill_sections.cleanup_skill_file(repo_root, skill_path)
updated = skill_path.read_text(encoding="utf-8")
self.assertFalse(changed)
self.assertEqual(changes, [])
self.assertEqual(updated, current_content)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,127 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
fix_missing_skill_sections = load_module(
"tools/scripts/fix_missing_skill_sections.py",
"fix_missing_skill_sections",
)
class FixMissingSkillSectionsTests(unittest.TestCase):
def test_normalizes_when_heading_variant(self):
content = """---
name: demo
description: Demo description.
---
# Demo
## When to Activate
Activate this skill when:
- something happens
"""
updated = fix_missing_skill_sections.normalize_when_heading_variants(content)
self.assertIn("## When to Use", updated)
self.assertNotIn("## When to Activate", updated)
def test_update_skill_file_adds_missing_sections(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: Structured guide for setting up A/B tests with mandatory gates for hypothesis and metrics.
---
# Demo
Intro paragraph.
""",
encoding="utf-8",
)
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path, add_missing=True)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertIn("added_when_to_use", changes)
self.assertIn("added_examples", changes)
self.assertIn("## When to Use", updated)
self.assertIn("## Examples", updated)
self.assertIn("Use @demo for this task:", updated)
def test_update_skill_file_only_adds_examples_when_when_section_exists(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: Build and distribute Expo development clients locally or via TestFlight.
---
# Demo
## When to Use
- Use this skill when native Expo changes need a dev client.
""",
encoding="utf-8",
)
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path, add_missing=True)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertNotIn("added_when_to_use", changes)
self.assertIn("added_examples", changes)
self.assertEqual(updated.count("## When to Use"), 1)
self.assertIn("## Examples", updated)
def test_update_skill_file_defaults_to_normalization_only(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: Demo description.
---
# Demo
## When to Activate
Activate this skill when:
- something happens
""",
encoding="utf-8",
)
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertEqual(changes, ["normalized_when_heading"])
self.assertIn("## When to Use", updated)
self.assertNotIn("## Examples", updated)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,112 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
fix_truncated_descriptions = load_module(
"tools/scripts/fix_truncated_descriptions.py",
"fix_truncated_descriptions",
)
class FixTruncatedDescriptionsTests(unittest.TestCase):
def test_pick_candidate_prefers_matching_paragraph(self):
description = "Master API design principles for resilient services..."
body = """
# Heading
Master API design principles for resilient services and consistent developer experience.
Another paragraph.
"""
candidate = fix_truncated_descriptions.pick_candidate(description, body)
self.assertEqual(
candidate,
"Master API design principles for resilient services and consistent developer experience.",
)
def test_update_skill_file_rewrites_single_line_description(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: "This description is truncated..."
risk: safe
source: self
---
# Demo
This skill helps you do something useful in a complete way.
""",
encoding="utf-8",
)
changed, new_description = fix_truncated_descriptions.update_skill_file(skill_path)
self.assertTrue(changed)
self.assertEqual(
new_description,
"This skill helps you do something useful in a complete way.",
)
updated = skill_path.read_text(encoding="utf-8")
self.assertIn(
'description: "This skill helps you do something useful in a complete way."',
updated,
)
def test_update_skill_file_rewrites_block_scalar_description(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: |
Interact with calendar data and schedule meetings,
update events, or...
risk: safe
source: self
---
# Demo
Lightweight calendar automation with standalone OAuth authentication and event management commands.
""",
encoding="utf-8",
)
changed, new_description = fix_truncated_descriptions.update_skill_file(skill_path)
self.assertTrue(changed)
self.assertEqual(
new_description,
"Lightweight calendar automation with standalone OAuth authentication and event management commands.",
)
updated = skill_path.read_text(encoding="utf-8")
self.assertIn(
'description: "Lightweight calendar automation with standalone OAuth authentication and event management commands."',
updated,
)
if __name__ == "__main__":
unittest.main()