meta(skills): Add skill audit and safe metadata fixes
Add repo-wide auditing and targeted repair scripts for skill metadata. Fix truncated descriptions automatically, keep heading normalization conservative, and remove synthetic boilerplate sections that degrade editorial quality while regenerating repo indexes and catalogs. Fixes #365
This commit is contained in:
389
tools/scripts/audit_skills.py
Normal file
389
tools/scripts/audit_skills.py
Normal file
@@ -0,0 +1,389 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from _project_paths import find_repo_root
|
||||
from validate_skills import configure_utf8_output, has_when_to_use_section, parse_frontmatter
|
||||
|
||||
|
||||
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
|
||||
FENCED_CODE_BLOCK_PATTERN = re.compile(r"^```", re.MULTILINE)
|
||||
EXAMPLES_HEADING_PATTERNS = [
|
||||
re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE),
|
||||
]
|
||||
LIMITATIONS_HEADING_PATTERNS = [
|
||||
re.compile(r"^##\s+Limitations?\b", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^##\s+Known\s+Limitations?\b", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^##\s+Constraints?\b", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^##\s+Out\s+of\s+Scope\b", re.MULTILINE | re.IGNORECASE),
|
||||
re.compile(r"^##\s+What\s+(This\s+Skill\s+)?Does(?:\s+Not|n't)\s+Do\b", re.MULTILINE | re.IGNORECASE),
|
||||
]
|
||||
MARKDOWN_LINK_PATTERN = re.compile(r"\[[^\]]*\]\(([^)]+)\)")
|
||||
DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$")
|
||||
SECURITY_DISCLAIMER_PATTERN = re.compile(r"AUTHORIZED USE ONLY", re.IGNORECASE)
|
||||
VALID_RISK_LEVELS = {"none", "safe", "critical", "offensive", "unknown"}
|
||||
DEFAULT_MARKDOWN_TOP_FINDINGS = 15
|
||||
DEFAULT_MARKDOWN_TOP_SKILLS = 20
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Finding:
|
||||
severity: str
|
||||
code: str
|
||||
message: str
|
||||
|
||||
def to_dict(self) -> dict[str, str]:
|
||||
return {
|
||||
"severity": self.severity,
|
||||
"code": self.code,
|
||||
"message": self.message,
|
||||
}
|
||||
|
||||
|
||||
def has_examples(content: str) -> bool:
|
||||
return bool(FENCED_CODE_BLOCK_PATTERN.search(content)) or any(
|
||||
pattern.search(content) for pattern in EXAMPLES_HEADING_PATTERNS
|
||||
)
|
||||
|
||||
|
||||
def has_limitations(content: str) -> bool:
|
||||
return any(pattern.search(content) for pattern in LIMITATIONS_HEADING_PATTERNS)
|
||||
|
||||
|
||||
def find_dangling_links(content: str, skill_root: Path) -> list[str]:
|
||||
broken_links: list[str] = []
|
||||
for link in MARKDOWN_LINK_PATTERN.findall(content):
|
||||
link_clean = link.split("#", 1)[0].strip()
|
||||
if not link_clean or link_clean.startswith(("http://", "https://", "mailto:", "<", ">")):
|
||||
continue
|
||||
if os.path.isabs(link_clean):
|
||||
continue
|
||||
|
||||
target_path = (skill_root / link_clean).resolve()
|
||||
if not target_path.exists():
|
||||
broken_links.append(link)
|
||||
return broken_links
|
||||
|
||||
|
||||
def build_skill_report(skill_root: Path, skills_dir: Path) -> dict[str, object]:
|
||||
skill_file = skill_root / "SKILL.md"
|
||||
rel_dir = skill_root.relative_to(skills_dir).as_posix()
|
||||
rel_file = f"{rel_dir}/SKILL.md"
|
||||
findings: list[Finding] = []
|
||||
|
||||
if skill_file.is_symlink():
|
||||
findings.append(
|
||||
Finding(
|
||||
"warning",
|
||||
"symlinked_skill_markdown",
|
||||
"SKILL.md is a symlink and was not audited for safety or usability.",
|
||||
)
|
||||
)
|
||||
return finalize_skill_report(rel_dir, rel_file, findings)
|
||||
|
||||
try:
|
||||
content = skill_file.read_text(encoding="utf-8")
|
||||
except Exception as exc: # pragma: no cover - defensive guard
|
||||
findings.append(Finding("error", "unreadable_file", f"Unable to read SKILL.md: {exc}"))
|
||||
return finalize_skill_report(rel_dir, rel_file, findings)
|
||||
|
||||
metadata, fm_errors = parse_frontmatter(content, rel_file)
|
||||
if metadata is None:
|
||||
findings.append(Finding("error", "invalid_frontmatter", "Missing or malformed YAML frontmatter."))
|
||||
return finalize_skill_report(rel_dir, rel_file, findings)
|
||||
|
||||
for error in fm_errors:
|
||||
findings.append(Finding("error", "invalid_frontmatter", error))
|
||||
|
||||
name = metadata.get("name")
|
||||
description = metadata.get("description")
|
||||
risk = metadata.get("risk")
|
||||
source = metadata.get("source")
|
||||
date_added = metadata.get("date_added")
|
||||
|
||||
if name != skill_root.name:
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"name_mismatch",
|
||||
f"Frontmatter name '{name}' does not match folder name '{skill_root.name}'.",
|
||||
)
|
||||
)
|
||||
|
||||
if description is None:
|
||||
findings.append(Finding("error", "missing_description", "Missing frontmatter description."))
|
||||
elif not isinstance(description, str):
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"invalid_description_type",
|
||||
f"Description must be a string, got {type(description).__name__}.",
|
||||
)
|
||||
)
|
||||
else:
|
||||
stripped_description = description.strip()
|
||||
if not stripped_description:
|
||||
findings.append(Finding("error", "empty_description", "Description is empty or whitespace only."))
|
||||
if len(description) > 300:
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"description_too_long",
|
||||
f"Description is {len(description)} characters long; keep it concise.",
|
||||
)
|
||||
)
|
||||
if ELLIPSIS_PATTERN.search(stripped_description):
|
||||
findings.append(
|
||||
Finding(
|
||||
"warning",
|
||||
"description_truncated",
|
||||
"Description ends with an ellipsis and likely needs cleanup for issue #365.",
|
||||
)
|
||||
)
|
||||
|
||||
if risk is None:
|
||||
findings.append(Finding("warning", "missing_risk", "Missing risk classification."))
|
||||
elif risk not in VALID_RISK_LEVELS:
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"invalid_risk",
|
||||
f"Risk must be one of {sorted(VALID_RISK_LEVELS)}, got '{risk}'.",
|
||||
)
|
||||
)
|
||||
|
||||
if source is None:
|
||||
findings.append(Finding("warning", "missing_source", "Missing source attribution."))
|
||||
|
||||
if date_added is not None and not DATE_PATTERN.match(str(date_added)):
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"invalid_date_added",
|
||||
f"date_added must use YYYY-MM-DD format, got '{date_added}'.",
|
||||
)
|
||||
)
|
||||
|
||||
if not has_when_to_use_section(content):
|
||||
findings.append(Finding("warning", "missing_when_to_use", "Missing a recognized 'When to Use' section."))
|
||||
|
||||
if not has_examples(content):
|
||||
findings.append(Finding("warning", "missing_examples", "Missing an example section or fenced example block."))
|
||||
|
||||
if not has_limitations(content):
|
||||
findings.append(Finding("warning", "missing_limitations", "Missing a limitations/constraints section."))
|
||||
|
||||
line_count = content.count("\n") + 1
|
||||
if line_count > 500:
|
||||
findings.append(
|
||||
Finding(
|
||||
"warning",
|
||||
"skill_too_long",
|
||||
f"SKILL.md is {line_count} lines long; consider splitting into references/.",
|
||||
)
|
||||
)
|
||||
|
||||
for broken_link in find_dangling_links(content, skill_root):
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"dangling_link",
|
||||
f"Broken relative markdown link: {broken_link}",
|
||||
)
|
||||
)
|
||||
|
||||
if risk == "offensive" and not SECURITY_DISCLAIMER_PATTERN.search(content):
|
||||
findings.append(
|
||||
Finding(
|
||||
"error",
|
||||
"missing_authorized_use_only",
|
||||
"Offensive skill is missing the required 'AUTHORIZED USE ONLY' disclaimer.",
|
||||
)
|
||||
)
|
||||
|
||||
return finalize_skill_report(rel_dir, rel_file, findings)
|
||||
|
||||
|
||||
def finalize_skill_report(skill_id: str, rel_file: str, findings: list[Finding]) -> dict[str, object]:
|
||||
severity_counts = Counter(finding.severity for finding in findings)
|
||||
if severity_counts["error"] > 0:
|
||||
status = "error"
|
||||
elif severity_counts["warning"] > 0:
|
||||
status = "warning"
|
||||
else:
|
||||
status = "ok"
|
||||
|
||||
return {
|
||||
"id": skill_id,
|
||||
"path": rel_file,
|
||||
"status": status,
|
||||
"error_count": severity_counts["error"],
|
||||
"warning_count": severity_counts["warning"],
|
||||
"info_count": severity_counts["info"],
|
||||
"findings": [finding.to_dict() for finding in findings],
|
||||
}
|
||||
|
||||
|
||||
def audit_skills(skills_dir: str | Path) -> dict[str, object]:
|
||||
configure_utf8_output()
|
||||
|
||||
skills_root = Path(skills_dir).resolve()
|
||||
reports: list[dict[str, object]] = []
|
||||
|
||||
for root, dirs, files in os.walk(skills_root):
|
||||
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
||||
if "SKILL.md" not in files:
|
||||
continue
|
||||
reports.append(build_skill_report(Path(root), skills_root))
|
||||
|
||||
reports.sort(key=lambda report: str(report["id"]).lower())
|
||||
|
||||
code_counts = Counter()
|
||||
severity_counts = Counter()
|
||||
for report in reports:
|
||||
for finding in report["findings"]:
|
||||
code_counts[finding["code"]] += 1
|
||||
severity_counts[finding["severity"]] += 1
|
||||
|
||||
summary = {
|
||||
"skills_scanned": len(reports),
|
||||
"skills_ok": sum(report["status"] == "ok" for report in reports),
|
||||
"skills_with_errors": sum(report["status"] == "error" for report in reports),
|
||||
"skills_with_warnings_only": sum(report["status"] == "warning" for report in reports),
|
||||
"errors": severity_counts["error"],
|
||||
"warnings": severity_counts["warning"],
|
||||
"infos": severity_counts["info"],
|
||||
"top_finding_codes": [
|
||||
{"code": code, "count": count}
|
||||
for code, count in code_counts.most_common()
|
||||
],
|
||||
}
|
||||
|
||||
return {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"skills_dir": str(skills_root),
|
||||
"summary": summary,
|
||||
"skills": reports,
|
||||
}
|
||||
|
||||
|
||||
def write_markdown_report(report: dict[str, object], destination: str | Path) -> None:
|
||||
summary = report["summary"]
|
||||
skills = report["skills"]
|
||||
top_findings = summary["top_finding_codes"][:DEFAULT_MARKDOWN_TOP_FINDINGS]
|
||||
top_skills = [
|
||||
skill for skill in skills if skill["status"] != "ok"
|
||||
][:DEFAULT_MARKDOWN_TOP_SKILLS]
|
||||
|
||||
lines = [
|
||||
"# Skills Audit Report",
|
||||
"",
|
||||
f"Generated at: `{report['generated_at']}`",
|
||||
"",
|
||||
"## Summary",
|
||||
"",
|
||||
f"- Skills scanned: **{summary['skills_scanned']}**",
|
||||
f"- Skills ready: **{summary['skills_ok']}**",
|
||||
f"- Skills with errors: **{summary['skills_with_errors']}**",
|
||||
f"- Skills with warnings only: **{summary['skills_with_warnings_only']}**",
|
||||
f"- Total errors: **{summary['errors']}**",
|
||||
f"- Total warnings: **{summary['warnings']}**",
|
||||
"",
|
||||
"## Top Finding Codes",
|
||||
"",
|
||||
"| Code | Count |",
|
||||
"| --- | ---: |",
|
||||
]
|
||||
|
||||
if top_findings:
|
||||
lines.extend(f"| `{item['code']}` | {item['count']} |" for item in top_findings)
|
||||
else:
|
||||
lines.append("| _none_ | 0 |")
|
||||
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
"## Skills Needing Attention",
|
||||
"",
|
||||
"| Skill | Status | Errors | Warnings |",
|
||||
"| --- | --- | ---: | ---: |",
|
||||
]
|
||||
)
|
||||
|
||||
if top_skills:
|
||||
lines.extend(
|
||||
f"| `{skill['id']}` | {skill['status']} | {skill['error_count']} | {skill['warning_count']} |"
|
||||
for skill in top_skills
|
||||
)
|
||||
else:
|
||||
lines.append("| _none_ | ok | 0 | 0 |")
|
||||
|
||||
Path(destination).write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def print_summary(report: dict[str, object]) -> None:
|
||||
summary = report["summary"]
|
||||
print("🔎 Skills audit completed")
|
||||
print(f" Skills scanned: {summary['skills_scanned']}")
|
||||
print(f" Ready: {summary['skills_ok']}")
|
||||
print(f" Warning only: {summary['skills_with_warnings_only']}")
|
||||
print(f" With errors: {summary['skills_with_errors']}")
|
||||
print(f" Total warnings: {summary['warnings']}")
|
||||
print(f" Total errors: {summary['errors']}")
|
||||
|
||||
top_findings = summary["top_finding_codes"][:10]
|
||||
if top_findings:
|
||||
print(" Top findings:")
|
||||
for item in top_findings:
|
||||
print(f" - {item['code']}: {item['count']}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Audit every SKILL.md for conformance and baseline usability.")
|
||||
parser.add_argument(
|
||||
"--json-out",
|
||||
help="Write the full machine-readable audit report to this path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--markdown-out",
|
||||
help="Write a concise Markdown summary to this path.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--strict",
|
||||
action="store_true",
|
||||
help="Exit with code 1 when warnings are present, not only errors.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = find_repo_root(__file__)
|
||||
report = audit_skills(repo_root / "skills")
|
||||
print_summary(report)
|
||||
|
||||
if args.json_out:
|
||||
Path(args.json_out).write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
|
||||
print(f"📝 Wrote JSON audit report to {args.json_out}")
|
||||
|
||||
if args.markdown_out:
|
||||
write_markdown_report(report, args.markdown_out)
|
||||
print(f"📝 Wrote Markdown audit report to {args.markdown_out}")
|
||||
|
||||
summary = report["summary"]
|
||||
if summary["errors"] > 0:
|
||||
return 1
|
||||
if args.strict and summary["warnings"] > 0:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
139
tools/scripts/cleanup_synthetic_skill_sections.py
Normal file
139
tools/scripts/cleanup_synthetic_skill_sections.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from _project_paths import find_repo_root
|
||||
from fix_missing_skill_sections import (
|
||||
build_examples_section,
|
||||
build_when_section,
|
||||
has_examples,
|
||||
has_when_to_use_section,
|
||||
)
|
||||
from validate_skills import configure_utf8_output, parse_frontmatter
|
||||
|
||||
|
||||
def get_head_content(repo_root: Path, relative_path: Path) -> str | None:
|
||||
result = subprocess.run(
|
||||
["git", "show", f"HEAD:{relative_path.as_posix()}"],
|
||||
cwd=repo_root,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
return result.stdout
|
||||
|
||||
|
||||
def remove_exact_section(content: str, section_text: str) -> str:
|
||||
normalized = content
|
||||
escaped = re.escape(section_text.strip())
|
||||
patterns = [
|
||||
re.compile(rf"\n\n{escaped}\n(?=\n##\s|\n#\s|\Z)", re.DOTALL),
|
||||
re.compile(rf"\n{escaped}\n(?=\n##\s|\n#\s|\Z)", re.DOTALL),
|
||||
]
|
||||
for pattern in patterns:
|
||||
normalized, count = pattern.subn("\n", normalized, count=1)
|
||||
if count:
|
||||
break
|
||||
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
|
||||
return normalized.rstrip() + "\n"
|
||||
|
||||
|
||||
def cleanup_skill_file(repo_root: Path, skill_path: Path) -> tuple[bool, list[str]]:
|
||||
current_content = skill_path.read_text(encoding="utf-8")
|
||||
metadata, _ = parse_frontmatter(current_content, skill_path.as_posix())
|
||||
if not metadata:
|
||||
return False, []
|
||||
|
||||
description = metadata.get("description")
|
||||
if not isinstance(description, str):
|
||||
return False, []
|
||||
|
||||
relative_path = skill_path.relative_to(repo_root)
|
||||
head_content = get_head_content(repo_root, relative_path)
|
||||
if head_content is None:
|
||||
return False, []
|
||||
|
||||
skill_name = str(metadata.get("name") or skill_path.parent.name)
|
||||
generated_when = build_when_section(skill_name, description)
|
||||
generated_examples = build_examples_section(skill_name, description)
|
||||
|
||||
updated = current_content
|
||||
changes: list[str] = []
|
||||
|
||||
if generated_when in updated and not has_when_to_use_section(head_content):
|
||||
updated = remove_exact_section(updated, generated_when)
|
||||
changes.append("removed_synthetic_when_to_use")
|
||||
|
||||
if generated_examples in updated and not has_examples(head_content):
|
||||
updated = remove_exact_section(updated, generated_examples)
|
||||
changes.append("removed_synthetic_examples")
|
||||
|
||||
if updated != current_content:
|
||||
skill_path.write_text(updated, encoding="utf-8")
|
||||
return True, changes
|
||||
|
||||
return False, []
|
||||
|
||||
|
||||
def main() -> int:
|
||||
configure_utf8_output()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Remove synthetic generic sections previously generated from descriptions.")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing files.")
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = find_repo_root(__file__)
|
||||
skills_dir = repo_root / "skills"
|
||||
|
||||
modified = 0
|
||||
for root, dirs, files in os.walk(skills_dir):
|
||||
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
||||
if "SKILL.md" not in files:
|
||||
continue
|
||||
|
||||
skill_path = Path(root) / "SKILL.md"
|
||||
current_content = skill_path.read_text(encoding="utf-8")
|
||||
metadata, _ = parse_frontmatter(current_content, skill_path.as_posix())
|
||||
if not metadata or not isinstance(metadata.get("description"), str):
|
||||
continue
|
||||
|
||||
relative_path = skill_path.relative_to(repo_root)
|
||||
head_content = get_head_content(repo_root, relative_path)
|
||||
if head_content is None:
|
||||
continue
|
||||
|
||||
skill_name = str(metadata.get("name") or skill_path.parent.name)
|
||||
generated_when = build_when_section(skill_name, metadata["description"])
|
||||
generated_examples = build_examples_section(skill_name, metadata["description"])
|
||||
changes: list[str] = []
|
||||
if generated_when in current_content and not has_when_to_use_section(head_content):
|
||||
changes.append("removed_synthetic_when_to_use")
|
||||
if generated_examples in current_content and not has_examples(head_content):
|
||||
changes.append("removed_synthetic_examples")
|
||||
if not changes:
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
modified += 1
|
||||
print(f"FIX {relative_path} [{', '.join(changes)}]")
|
||||
continue
|
||||
|
||||
changed, actual_changes = cleanup_skill_file(repo_root, skill_path)
|
||||
if changed:
|
||||
modified += 1
|
||||
print(f"FIX {relative_path} [{', '.join(actual_changes)}]")
|
||||
|
||||
print(f"\nModified: {modified}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
202
tools/scripts/fix_missing_skill_sections.py
Normal file
202
tools/scripts/fix_missing_skill_sections.py
Normal file
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from _project_paths import find_repo_root
|
||||
from validate_skills import configure_utf8_output, has_when_to_use_section, parse_frontmatter
|
||||
|
||||
|
||||
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
|
||||
WHEN_SECTION_VARIANT_PATTERNS = [
|
||||
(re.compile(r"^##\s*when to apply\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
|
||||
(re.compile(r"^##\s*when to activate\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
|
||||
(re.compile(r"^##\s*\d+[.)]?\s*when to use(?: this skill)?\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
|
||||
(re.compile(r"^##\s*when to use\s*$", re.MULTILINE | re.IGNORECASE), "## When to Use"),
|
||||
]
|
||||
EXAMPLES_HEADING_PATTERN = re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE)
|
||||
USAGE_HEADING_PATTERN = re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE)
|
||||
FENCED_CODE_BLOCK_PATTERN = re.compile(r"^```", re.MULTILINE)
|
||||
MULTISPACE_PATTERN = re.compile(r"\s+")
|
||||
|
||||
|
||||
def has_examples(content: str) -> bool:
|
||||
return bool(
|
||||
FENCED_CODE_BLOCK_PATTERN.search(content)
|
||||
or EXAMPLES_HEADING_PATTERN.search(content)
|
||||
or USAGE_HEADING_PATTERN.search(content)
|
||||
)
|
||||
|
||||
|
||||
def normalize_whitespace(text: str) -> str:
|
||||
return MULTISPACE_PATTERN.sub(" ", text.strip())
|
||||
|
||||
|
||||
def normalize_when_heading_variants(content: str) -> str:
|
||||
updated = content
|
||||
for pattern, replacement in WHEN_SECTION_VARIANT_PATTERNS:
|
||||
updated = pattern.sub(replacement, updated)
|
||||
return updated
|
||||
|
||||
|
||||
def normalize_description_for_prompt(description: str) -> str:
|
||||
text = normalize_whitespace(description).rstrip(".")
|
||||
if text.lower().startswith("this skill should be used when "):
|
||||
text = "Use this skill when " + text[len("this skill should be used when "):]
|
||||
elif text.lower().startswith("always use this skill when "):
|
||||
text = "Use this skill when " + text[len("always use this skill when "):]
|
||||
elif text.lower().startswith("use when "):
|
||||
text = "Use this skill when " + text[len("use when "):]
|
||||
return text
|
||||
|
||||
|
||||
def build_when_section(skill_name: str, description: str) -> str:
|
||||
normalized = normalize_description_for_prompt(description)
|
||||
lower = normalized.lower()
|
||||
|
||||
if lower.startswith("use this skill when "):
|
||||
sentence = normalized[0].upper() + normalized[1:]
|
||||
elif lower.startswith("use when "):
|
||||
sentence = "Use this skill when " + normalized[len("Use when "):]
|
||||
else:
|
||||
sentence = f"Use this skill when the task matches this description: {normalized}."
|
||||
|
||||
return "\n".join(
|
||||
[
|
||||
"## When to Use",
|
||||
f"- {sentence}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def build_examples_section(skill_name: str, description: str) -> str:
|
||||
normalized = normalize_whitespace(description).rstrip(".")
|
||||
return "\n".join(
|
||||
[
|
||||
"## Examples",
|
||||
"```text",
|
||||
f"Use @{skill_name} for this task: {normalized}.",
|
||||
"",
|
||||
"Apply the skill to my current work and walk me through the safest next steps,",
|
||||
"key checks, and the concrete output I should produce.",
|
||||
"```",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def find_insert_after_intro(content: str) -> int:
|
||||
body_start = 0
|
||||
match = FRONTMATTER_PATTERN.search(content)
|
||||
if match:
|
||||
body_start = match.end()
|
||||
|
||||
remainder = content[body_start:]
|
||||
section_match = re.search(r"^##\s+", remainder, re.MULTILINE)
|
||||
if section_match:
|
||||
return body_start + section_match.start()
|
||||
return len(content)
|
||||
|
||||
|
||||
def insert_section_after_intro(content: str, section_text: str) -> str:
|
||||
insert_at = find_insert_after_intro(content)
|
||||
prefix = content[:insert_at].rstrip() + "\n\n"
|
||||
suffix = content[insert_at:].lstrip()
|
||||
if suffix:
|
||||
return prefix + section_text + "\n\n" + suffix
|
||||
return prefix + section_text + "\n"
|
||||
|
||||
|
||||
def append_section(content: str, section_text: str) -> str:
|
||||
return content.rstrip() + "\n\n" + section_text + "\n"
|
||||
|
||||
|
||||
def update_skill_file(skill_path: Path, *, add_missing: bool = False) -> tuple[bool, list[str]]:
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
||||
if not metadata:
|
||||
return False, []
|
||||
|
||||
updated = normalize_when_heading_variants(content)
|
||||
changes: list[str] = []
|
||||
description = metadata.get("description")
|
||||
skill_name = str(metadata.get("name") or skill_path.parent.name)
|
||||
|
||||
if isinstance(description, str):
|
||||
if updated != content:
|
||||
changes.append("normalized_when_heading")
|
||||
|
||||
if add_missing and not has_when_to_use_section(updated):
|
||||
updated = insert_section_after_intro(updated, build_when_section(skill_name, description))
|
||||
changes.append("added_when_to_use")
|
||||
|
||||
if add_missing and not has_examples(updated):
|
||||
updated = append_section(updated, build_examples_section(skill_name, description))
|
||||
changes.append("added_examples")
|
||||
|
||||
if updated != content:
|
||||
skill_path.write_text(updated, encoding="utf-8")
|
||||
return True, changes
|
||||
|
||||
return False, changes
|
||||
|
||||
|
||||
def main() -> int:
|
||||
configure_utf8_output()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Normalize skill section headings and optionally add missing sections.")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing files.")
|
||||
parser.add_argument(
|
||||
"--add-missing",
|
||||
action="store_true",
|
||||
help="Also synthesize missing 'When to Use' and 'Examples' sections from the description.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = find_repo_root(__file__)
|
||||
skills_dir = repo_root / "skills"
|
||||
|
||||
modified = 0
|
||||
for root, dirs, files in os.walk(skills_dir):
|
||||
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
||||
if "SKILL.md" not in files:
|
||||
continue
|
||||
|
||||
skill_path = Path(root) / "SKILL.md"
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
||||
if not metadata or not isinstance(metadata.get("description"), str):
|
||||
continue
|
||||
|
||||
simulated = normalize_when_heading_variants(content)
|
||||
needs_when = args.add_missing and not has_when_to_use_section(simulated)
|
||||
needs_examples = args.add_missing and not has_examples(simulated)
|
||||
if not needs_when and not needs_examples and simulated == content:
|
||||
continue
|
||||
|
||||
if args.dry_run:
|
||||
change_labels: list[str] = []
|
||||
if simulated != content:
|
||||
change_labels.append("normalized_when_heading")
|
||||
if needs_when:
|
||||
change_labels.append("added_when_to_use")
|
||||
if needs_examples:
|
||||
change_labels.append("added_examples")
|
||||
modified += 1
|
||||
print(f"FIX {skill_path.relative_to(repo_root)} [{', '.join(change_labels)}]")
|
||||
continue
|
||||
|
||||
changed, changes = update_skill_file(skill_path, add_missing=args.add_missing)
|
||||
if changed:
|
||||
modified += 1
|
||||
print(f"FIX {skill_path.relative_to(repo_root)} [{', '.join(changes)}]")
|
||||
|
||||
print(f"\nModified: {modified}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
250
tools/scripts/fix_truncated_descriptions.py
Normal file
250
tools/scripts/fix_truncated_descriptions.py
Normal file
@@ -0,0 +1,250 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from _project_paths import find_repo_root
|
||||
from validate_skills import configure_utf8_output, parse_frontmatter
|
||||
|
||||
|
||||
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
|
||||
MAX_DESCRIPTION_LENGTH = 300
|
||||
MIN_PARAGRAPH_LENGTH = 40
|
||||
TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*")
|
||||
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
|
||||
MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+")
|
||||
HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
|
||||
MULTISPACE_PATTERN = re.compile(r"\s+")
|
||||
|
||||
|
||||
def strip_frontmatter(content: str) -> str:
|
||||
match = FRONTMATTER_PATTERN.search(content)
|
||||
if not match:
|
||||
return content
|
||||
return content[match.end():].lstrip()
|
||||
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
text = text.strip()
|
||||
text = re.sub(r"^\s*>+\s?", "", text)
|
||||
text = MARKDOWN_DECORATION_PATTERN.sub("", text)
|
||||
text = HTML_TAG_PATTERN.sub("", text)
|
||||
text = MULTISPACE_PATTERN.sub(" ", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def split_candidate_paragraphs(body: str) -> list[str]:
|
||||
paragraphs: list[str] = []
|
||||
current: list[str] = []
|
||||
in_code_block = False
|
||||
|
||||
for raw_line in body.splitlines():
|
||||
line = raw_line.rstrip()
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith("```"):
|
||||
in_code_block = not in_code_block
|
||||
if current:
|
||||
paragraphs.append(" ".join(current))
|
||||
current = []
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
continue
|
||||
|
||||
if not stripped:
|
||||
if current:
|
||||
paragraphs.append(" ".join(current))
|
||||
current = []
|
||||
continue
|
||||
|
||||
if stripped.startswith("#"):
|
||||
if current:
|
||||
paragraphs.append(" ".join(current))
|
||||
current = []
|
||||
continue
|
||||
|
||||
if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")):
|
||||
if current:
|
||||
paragraphs.append(" ".join(current))
|
||||
current = []
|
||||
continue
|
||||
|
||||
current.append(stripped)
|
||||
|
||||
if current:
|
||||
paragraphs.append(" ".join(current))
|
||||
|
||||
return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)]
|
||||
|
||||
|
||||
def is_usable_paragraph(paragraph: str) -> bool:
|
||||
lower = paragraph.lower()
|
||||
if len(paragraph) < MIN_PARAGRAPH_LENGTH:
|
||||
return False
|
||||
if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")):
|
||||
return False
|
||||
if lower.startswith("this skill is applicable to execute the workflow"):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def normalize_for_match(text: str) -> str:
|
||||
return re.sub(r"[^a-z0-9]+", "", text.lower())
|
||||
|
||||
|
||||
def pick_candidate(description: str, body: str) -> str | None:
|
||||
paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)]
|
||||
if not paragraphs:
|
||||
return None
|
||||
|
||||
desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip()
|
||||
normalized_prefix = normalize_for_match(desc_prefix)
|
||||
|
||||
if normalized_prefix:
|
||||
for paragraph in paragraphs:
|
||||
normalized_paragraph = normalize_for_match(paragraph)
|
||||
if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph:
|
||||
return paragraph
|
||||
|
||||
return paragraphs[0]
|
||||
|
||||
|
||||
def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
|
||||
text = normalize_text(text)
|
||||
if len(text) <= max_length:
|
||||
return text
|
||||
|
||||
sentence_candidates = [". ", "! ", "? "]
|
||||
best_split = -1
|
||||
for marker in sentence_candidates:
|
||||
split = text.rfind(marker, 0, max_length + 1)
|
||||
if split > best_split:
|
||||
best_split = split
|
||||
|
||||
if best_split != -1:
|
||||
return text[: best_split + 1].strip()
|
||||
|
||||
split = text.rfind(" ", 0, max_length + 1)
|
||||
if split == -1:
|
||||
return text[:max_length].strip()
|
||||
return text[:split].strip()
|
||||
|
||||
|
||||
def escape_yaml_string(text: str) -> str:
|
||||
return text.replace("\\", "\\\\").replace('"', '\\"')
|
||||
|
||||
|
||||
def replace_description(frontmatter_text: str, new_description: str) -> str:
|
||||
lines = frontmatter_text.splitlines()
|
||||
replacement = f'description: "{escape_yaml_string(new_description)}"'
|
||||
|
||||
for index, line in enumerate(lines):
|
||||
if not re.match(r"^\s*description:\s*", line):
|
||||
continue
|
||||
|
||||
current_indent = len(line) - len(line.lstrip(" "))
|
||||
end_index = index + 1
|
||||
while end_index < len(lines):
|
||||
candidate = lines[end_index]
|
||||
stripped = candidate.strip()
|
||||
candidate_indent = len(candidate) - len(candidate.lstrip(" "))
|
||||
if not stripped:
|
||||
end_index += 1
|
||||
continue
|
||||
if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped):
|
||||
break
|
||||
end_index += 1
|
||||
|
||||
updated = lines[:index] + [replacement] + lines[end_index:]
|
||||
return "\n".join(updated)
|
||||
|
||||
raise ValueError("Description field not found in frontmatter.")
|
||||
|
||||
|
||||
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
match = FRONTMATTER_PATTERN.search(content)
|
||||
if not match:
|
||||
return False, None
|
||||
|
||||
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
||||
if not metadata:
|
||||
return False, None
|
||||
|
||||
description = metadata.get("description")
|
||||
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
|
||||
return False, None
|
||||
|
||||
candidate = pick_candidate(description, strip_frontmatter(content))
|
||||
if not candidate:
|
||||
return False, None
|
||||
|
||||
new_description = clamp_description(candidate)
|
||||
if not new_description or new_description == normalize_text(description):
|
||||
return False, None
|
||||
|
||||
updated_frontmatter = replace_description(match.group(1), new_description)
|
||||
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
|
||||
if updated_content == content:
|
||||
return False, None
|
||||
|
||||
skill_path.write_text(updated_content, encoding="utf-8")
|
||||
return True, new_description
|
||||
|
||||
|
||||
def main() -> int:
|
||||
configure_utf8_output()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = find_repo_root(__file__)
|
||||
skills_dir = repo_root / "skills"
|
||||
|
||||
fixed = 0
|
||||
skipped = 0
|
||||
for root, dirs, files in os.walk(skills_dir):
|
||||
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
|
||||
if "SKILL.md" not in files:
|
||||
continue
|
||||
|
||||
skill_path = Path(root) / "SKILL.md"
|
||||
content = skill_path.read_text(encoding="utf-8")
|
||||
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
|
||||
description = metadata.get("description") if metadata else None
|
||||
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
|
||||
continue
|
||||
|
||||
candidate = pick_candidate(description, strip_frontmatter(content))
|
||||
if not candidate:
|
||||
skipped += 1
|
||||
print(f"SKIP {skill_path.relative_to(repo_root)}")
|
||||
continue
|
||||
|
||||
new_description = clamp_description(candidate)
|
||||
if args.dry_run:
|
||||
fixed += 1
|
||||
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
|
||||
continue
|
||||
|
||||
changed, _ = update_skill_file(skill_path)
|
||||
if changed:
|
||||
fixed += 1
|
||||
print(f"FIX {skill_path.relative_to(repo_root)}")
|
||||
else:
|
||||
skipped += 1
|
||||
print(f"SKIP {skill_path.relative_to(repo_root)}")
|
||||
|
||||
print(f"\nFixed: {fixed}")
|
||||
print(f"Skipped: {skipped}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -18,6 +18,10 @@ const LOCAL_TEST_COMMANDS = [
|
||||
[path.join(TOOL_TESTS, "workflow_contracts.test.js")],
|
||||
[path.join(TOOL_TESTS, "docs_security_content.test.js")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_bundle_activation_security.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_audit_skills.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_cleanup_synthetic_skill_sections.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_missing_skill_sections.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_truncated_descriptions.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_sync_microsoft_skills_security.py")],
|
||||
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_validate_skills_headings.py")],
|
||||
];
|
||||
|
||||
142
tools/scripts/tests/test_audit_skills.py
Normal file
142
tools/scripts/tests/test_audit_skills.py
Normal file
@@ -0,0 +1,142 @@
|
||||
import importlib.util
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
|
||||
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
|
||||
|
||||
|
||||
def load_module(relative_path: str, module_name: str):
|
||||
module_path = REPO_ROOT / relative_path
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
audit_skills = load_module("tools/scripts/audit_skills.py", "audit_skills")
|
||||
|
||||
|
||||
class AuditSkillsTests(unittest.TestCase):
|
||||
def test_audit_marks_complete_skill_as_ok(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
skills_dir = root / "skills"
|
||||
skill_dir = skills_dir / "good-skill"
|
||||
skill_dir.mkdir(parents=True)
|
||||
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"""---
|
||||
name: good-skill
|
||||
description: Useful and complete skill description
|
||||
risk: safe
|
||||
source: self
|
||||
date_added: 2026-03-20
|
||||
---
|
||||
|
||||
# Good Skill
|
||||
|
||||
## When to Use
|
||||
- Use when the user needs a solid example.
|
||||
|
||||
## Examples
|
||||
```bash
|
||||
echo "hello"
|
||||
```
|
||||
|
||||
## Limitations
|
||||
- Demo only.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
report = audit_skills.audit_skills(skills_dir)
|
||||
|
||||
self.assertEqual(report["summary"]["skills_scanned"], 1)
|
||||
self.assertEqual(report["summary"]["skills_ok"], 1)
|
||||
self.assertEqual(report["summary"]["warnings"], 0)
|
||||
self.assertEqual(report["summary"]["errors"], 0)
|
||||
self.assertEqual(report["skills"][0]["status"], "ok")
|
||||
|
||||
def test_audit_flags_truncated_description_and_missing_sections(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
skills_dir = root / "skills"
|
||||
skill_dir = skills_dir / "truncated-skill"
|
||||
skill_dir.mkdir(parents=True)
|
||||
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"""---
|
||||
name: truncated-skill
|
||||
description: This description was cut off...
|
||||
risk: safe
|
||||
source: self
|
||||
---
|
||||
|
||||
# Truncated Skill
|
||||
|
||||
## When to Use
|
||||
- Use when reproducing issue 365.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
report = audit_skills.audit_skills(skills_dir)
|
||||
finding_codes = {finding["code"] for finding in report["skills"][0]["findings"]}
|
||||
|
||||
self.assertEqual(report["skills"][0]["status"], "warning")
|
||||
self.assertIn("description_truncated", finding_codes)
|
||||
self.assertIn("missing_examples", finding_codes)
|
||||
self.assertIn("missing_limitations", finding_codes)
|
||||
|
||||
def test_audit_flags_blocking_errors(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
root = Path(temp_dir)
|
||||
skills_dir = root / "skills"
|
||||
skill_dir = skills_dir / "offensive-skill"
|
||||
skill_dir.mkdir(parents=True)
|
||||
(skill_dir / "missing.md").write_text("# missing\n", encoding="utf-8")
|
||||
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"""---
|
||||
name: offensive-skill
|
||||
description: Offensive example skill
|
||||
risk: offensive
|
||||
source: self
|
||||
---
|
||||
|
||||
# Offensive Skill
|
||||
|
||||
## When to Use
|
||||
- Use only in authorized environments.
|
||||
|
||||
## Examples
|
||||
```bash
|
||||
cat missing.md
|
||||
```
|
||||
|
||||
See [details](missing-reference.md).
|
||||
|
||||
## Limitations
|
||||
- Example only.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
report = audit_skills.audit_skills(skills_dir)
|
||||
finding_codes = {finding["code"] for finding in report["skills"][0]["findings"]}
|
||||
|
||||
self.assertEqual(report["skills"][0]["status"], "error")
|
||||
self.assertIn("dangling_link", finding_codes)
|
||||
self.assertIn("missing_authorized_use_only", finding_codes)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
162
tools/scripts/tests/test_cleanup_synthetic_skill_sections.py
Normal file
162
tools/scripts/tests/test_cleanup_synthetic_skill_sections.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import importlib.util
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
|
||||
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
|
||||
|
||||
|
||||
def load_module(relative_path: str, module_name: str):
|
||||
module_path = REPO_ROOT / relative_path
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
cleanup_synthetic_skill_sections = load_module(
|
||||
"tools/scripts/cleanup_synthetic_skill_sections.py",
|
||||
"cleanup_synthetic_skill_sections",
|
||||
)
|
||||
fix_missing_skill_sections = load_module(
|
||||
"tools/scripts/fix_missing_skill_sections.py",
|
||||
"fix_missing_skill_sections_for_cleanup_tests",
|
||||
)
|
||||
|
||||
|
||||
class CleanupSyntheticSkillSectionsTests(unittest.TestCase):
|
||||
def test_remove_exact_section_preserves_other_content(self):
|
||||
content = """---
|
||||
name: demo
|
||||
description: Demo description.
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
## When to Use
|
||||
- Use this skill when demo work is needed.
|
||||
|
||||
## Examples
|
||||
```text
|
||||
Use @demo for this task:
|
||||
foo
|
||||
```
|
||||
|
||||
## Notes
|
||||
Keep this section.
|
||||
"""
|
||||
section = """## Examples
|
||||
```text
|
||||
Use @demo for this task:
|
||||
foo
|
||||
```"""
|
||||
|
||||
updated = cleanup_synthetic_skill_sections.remove_exact_section(content, section)
|
||||
|
||||
self.assertNotIn("## Examples", updated)
|
||||
self.assertIn("## Notes", updated)
|
||||
self.assertIn("Keep this section.", updated)
|
||||
|
||||
def test_cleanup_skill_file_removes_only_generated_sections_missing_from_head(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
repo_root = Path(temp_dir)
|
||||
skill_dir = repo_root / "skills" / "demo"
|
||||
skill_dir.mkdir(parents=True)
|
||||
skill_path = skill_dir / "SKILL.md"
|
||||
|
||||
description = "Build and distribute Expo development clients locally or via TestFlight."
|
||||
generated_when = fix_missing_skill_sections.build_when_section("demo", description)
|
||||
generated_examples = fix_missing_skill_sections.build_examples_section("demo", description)
|
||||
|
||||
current_content = f"""---
|
||||
name: demo
|
||||
description: {description}
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
{generated_when}
|
||||
|
||||
{generated_examples}
|
||||
|
||||
## Notes
|
||||
Human-written content.
|
||||
"""
|
||||
skill_path.write_text(current_content, encoding="utf-8")
|
||||
|
||||
head_content = f"""---
|
||||
name: demo
|
||||
description: {description}
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
## Notes
|
||||
Human-written content.
|
||||
"""
|
||||
|
||||
with mock.patch.object(
|
||||
cleanup_synthetic_skill_sections,
|
||||
"get_head_content",
|
||||
return_value=head_content,
|
||||
):
|
||||
changed, changes = cleanup_synthetic_skill_sections.cleanup_skill_file(repo_root, skill_path)
|
||||
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
self.assertTrue(changed)
|
||||
self.assertEqual(
|
||||
changes,
|
||||
["removed_synthetic_when_to_use", "removed_synthetic_examples"],
|
||||
)
|
||||
self.assertNotIn(generated_when, updated)
|
||||
self.assertNotIn(generated_examples, updated)
|
||||
self.assertIn("## Notes", updated)
|
||||
|
||||
def test_cleanup_skill_file_keeps_real_sections_that_already_existed_in_head(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
repo_root = Path(temp_dir)
|
||||
skill_dir = repo_root / "skills" / "demo"
|
||||
skill_dir.mkdir(parents=True)
|
||||
skill_path = skill_dir / "SKILL.md"
|
||||
|
||||
description = "Build and distribute Expo development clients locally or via TestFlight."
|
||||
generated_when = fix_missing_skill_sections.build_when_section("demo", description)
|
||||
generated_examples = fix_missing_skill_sections.build_examples_section("demo", description)
|
||||
|
||||
current_content = f"""---
|
||||
name: demo
|
||||
description: {description}
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
{generated_when}
|
||||
|
||||
{generated_examples}
|
||||
"""
|
||||
skill_path.write_text(current_content, encoding="utf-8")
|
||||
|
||||
with mock.patch.object(
|
||||
cleanup_synthetic_skill_sections,
|
||||
"get_head_content",
|
||||
return_value=current_content,
|
||||
):
|
||||
changed, changes = cleanup_synthetic_skill_sections.cleanup_skill_file(repo_root, skill_path)
|
||||
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
self.assertFalse(changed)
|
||||
self.assertEqual(changes, [])
|
||||
self.assertEqual(updated, current_content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
127
tools/scripts/tests/test_fix_missing_skill_sections.py
Normal file
127
tools/scripts/tests/test_fix_missing_skill_sections.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import importlib.util
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
|
||||
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
|
||||
|
||||
|
||||
def load_module(relative_path: str, module_name: str):
|
||||
module_path = REPO_ROOT / relative_path
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
fix_missing_skill_sections = load_module(
|
||||
"tools/scripts/fix_missing_skill_sections.py",
|
||||
"fix_missing_skill_sections",
|
||||
)
|
||||
|
||||
|
||||
class FixMissingSkillSectionsTests(unittest.TestCase):
|
||||
def test_normalizes_when_heading_variant(self):
|
||||
content = """---
|
||||
name: demo
|
||||
description: Demo description.
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
## When to Activate
|
||||
Activate this skill when:
|
||||
- something happens
|
||||
"""
|
||||
updated = fix_missing_skill_sections.normalize_when_heading_variants(content)
|
||||
self.assertIn("## When to Use", updated)
|
||||
self.assertNotIn("## When to Activate", updated)
|
||||
|
||||
def test_update_skill_file_adds_missing_sections(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
skill_path = Path(temp_dir) / "SKILL.md"
|
||||
skill_path.write_text(
|
||||
"""---
|
||||
name: demo
|
||||
description: Structured guide for setting up A/B tests with mandatory gates for hypothesis and metrics.
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
Intro paragraph.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path, add_missing=True)
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
|
||||
self.assertTrue(changed)
|
||||
self.assertIn("added_when_to_use", changes)
|
||||
self.assertIn("added_examples", changes)
|
||||
self.assertIn("## When to Use", updated)
|
||||
self.assertIn("## Examples", updated)
|
||||
self.assertIn("Use @demo for this task:", updated)
|
||||
|
||||
def test_update_skill_file_only_adds_examples_when_when_section_exists(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
skill_path = Path(temp_dir) / "SKILL.md"
|
||||
skill_path.write_text(
|
||||
"""---
|
||||
name: demo
|
||||
description: Build and distribute Expo development clients locally or via TestFlight.
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
## When to Use
|
||||
- Use this skill when native Expo changes need a dev client.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path, add_missing=True)
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
|
||||
self.assertTrue(changed)
|
||||
self.assertNotIn("added_when_to_use", changes)
|
||||
self.assertIn("added_examples", changes)
|
||||
self.assertEqual(updated.count("## When to Use"), 1)
|
||||
self.assertIn("## Examples", updated)
|
||||
|
||||
def test_update_skill_file_defaults_to_normalization_only(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
skill_path = Path(temp_dir) / "SKILL.md"
|
||||
skill_path.write_text(
|
||||
"""---
|
||||
name: demo
|
||||
description: Demo description.
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
## When to Activate
|
||||
Activate this skill when:
|
||||
- something happens
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path)
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
|
||||
self.assertTrue(changed)
|
||||
self.assertEqual(changes, ["normalized_when_heading"])
|
||||
self.assertIn("## When to Use", updated)
|
||||
self.assertNotIn("## Examples", updated)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
112
tools/scripts/tests/test_fix_truncated_descriptions.py
Normal file
112
tools/scripts/tests/test_fix_truncated_descriptions.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import importlib.util
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
|
||||
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
|
||||
|
||||
|
||||
def load_module(relative_path: str, module_name: str):
|
||||
module_path = REPO_ROOT / relative_path
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
fix_truncated_descriptions = load_module(
|
||||
"tools/scripts/fix_truncated_descriptions.py",
|
||||
"fix_truncated_descriptions",
|
||||
)
|
||||
|
||||
|
||||
class FixTruncatedDescriptionsTests(unittest.TestCase):
|
||||
def test_pick_candidate_prefers_matching_paragraph(self):
|
||||
description = "Master API design principles for resilient services..."
|
||||
body = """
|
||||
# Heading
|
||||
|
||||
Master API design principles for resilient services and consistent developer experience.
|
||||
|
||||
Another paragraph.
|
||||
"""
|
||||
candidate = fix_truncated_descriptions.pick_candidate(description, body)
|
||||
self.assertEqual(
|
||||
candidate,
|
||||
"Master API design principles for resilient services and consistent developer experience.",
|
||||
)
|
||||
|
||||
def test_update_skill_file_rewrites_single_line_description(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
skill_path = Path(temp_dir) / "SKILL.md"
|
||||
skill_path.write_text(
|
||||
"""---
|
||||
name: demo
|
||||
description: "This description is truncated..."
|
||||
risk: safe
|
||||
source: self
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
This skill helps you do something useful in a complete way.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
changed, new_description = fix_truncated_descriptions.update_skill_file(skill_path)
|
||||
|
||||
self.assertTrue(changed)
|
||||
self.assertEqual(
|
||||
new_description,
|
||||
"This skill helps you do something useful in a complete way.",
|
||||
)
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
self.assertIn(
|
||||
'description: "This skill helps you do something useful in a complete way."',
|
||||
updated,
|
||||
)
|
||||
|
||||
def test_update_skill_file_rewrites_block_scalar_description(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
skill_path = Path(temp_dir) / "SKILL.md"
|
||||
skill_path.write_text(
|
||||
"""---
|
||||
name: demo
|
||||
description: |
|
||||
Interact with calendar data and schedule meetings,
|
||||
update events, or...
|
||||
risk: safe
|
||||
source: self
|
||||
---
|
||||
|
||||
# Demo
|
||||
|
||||
Lightweight calendar automation with standalone OAuth authentication and event management commands.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
changed, new_description = fix_truncated_descriptions.update_skill_file(skill_path)
|
||||
|
||||
self.assertTrue(changed)
|
||||
self.assertEqual(
|
||||
new_description,
|
||||
"Lightweight calendar automation with standalone OAuth authentication and event management commands.",
|
||||
)
|
||||
updated = skill_path.read_text(encoding="utf-8")
|
||||
self.assertIn(
|
||||
'description: "Lightweight calendar automation with standalone OAuth authentication and event management commands."',
|
||||
updated,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user