meta(skills): Add skill audit and safe metadata fixes

Add repo-wide auditing and targeted repair scripts for skill metadata.
Fix truncated descriptions automatically, keep heading normalization
conservative, and remove synthetic boilerplate sections that degrade
editorial quality while regenerating repo indexes and catalogs.

Fixes #365
This commit is contained in:
sickn33
2026-03-20 09:05:02 +01:00
parent 93f4448c6a
commit fc3c7ae8a2
544 changed files with 6128 additions and 5290 deletions

View File

@@ -18,6 +18,10 @@ const LOCAL_TEST_COMMANDS = [
[path.join(TOOL_TESTS, "workflow_contracts.test.js")],
[path.join(TOOL_TESTS, "docs_security_content.test.js")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_bundle_activation_security.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_audit_skills.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_cleanup_synthetic_skill_sections.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_missing_skill_sections.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_fix_truncated_descriptions.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_sync_microsoft_skills_security.py")],
[path.join(TOOL_SCRIPTS, "run-python.js"), path.join(TOOL_TESTS, "test_validate_skills_headings.py")],
];

View File

@@ -0,0 +1,142 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
audit_skills = load_module("tools/scripts/audit_skills.py", "audit_skills")
class AuditSkillsTests(unittest.TestCase):
def test_audit_marks_complete_skill_as_ok(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "good-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: good-skill
description: Useful and complete skill description
risk: safe
source: self
date_added: 2026-03-20
---
# Good Skill
## When to Use
- Use when the user needs a solid example.
## Examples
```bash
echo "hello"
```
## Limitations
- Demo only.
""",
encoding="utf-8",
)
report = audit_skills.audit_skills(skills_dir)
self.assertEqual(report["summary"]["skills_scanned"], 1)
self.assertEqual(report["summary"]["skills_ok"], 1)
self.assertEqual(report["summary"]["warnings"], 0)
self.assertEqual(report["summary"]["errors"], 0)
self.assertEqual(report["skills"][0]["status"], "ok")
def test_audit_flags_truncated_description_and_missing_sections(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "truncated-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: truncated-skill
description: This description was cut off...
risk: safe
source: self
---
# Truncated Skill
## When to Use
- Use when reproducing issue 365.
""",
encoding="utf-8",
)
report = audit_skills.audit_skills(skills_dir)
finding_codes = {finding["code"] for finding in report["skills"][0]["findings"]}
self.assertEqual(report["skills"][0]["status"], "warning")
self.assertIn("description_truncated", finding_codes)
self.assertIn("missing_examples", finding_codes)
self.assertIn("missing_limitations", finding_codes)
def test_audit_flags_blocking_errors(self):
with tempfile.TemporaryDirectory() as temp_dir:
root = Path(temp_dir)
skills_dir = root / "skills"
skill_dir = skills_dir / "offensive-skill"
skill_dir.mkdir(parents=True)
(skill_dir / "missing.md").write_text("# missing\n", encoding="utf-8")
(skill_dir / "SKILL.md").write_text(
"""---
name: offensive-skill
description: Offensive example skill
risk: offensive
source: self
---
# Offensive Skill
## When to Use
- Use only in authorized environments.
## Examples
```bash
cat missing.md
```
See [details](missing-reference.md).
## Limitations
- Example only.
""",
encoding="utf-8",
)
report = audit_skills.audit_skills(skills_dir)
finding_codes = {finding["code"] for finding in report["skills"][0]["findings"]}
self.assertEqual(report["skills"][0]["status"], "error")
self.assertIn("dangling_link", finding_codes)
self.assertIn("missing_authorized_use_only", finding_codes)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,162 @@
import importlib.util
import subprocess
import sys
import tempfile
import unittest
from pathlib import Path
from unittest import mock
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
cleanup_synthetic_skill_sections = load_module(
"tools/scripts/cleanup_synthetic_skill_sections.py",
"cleanup_synthetic_skill_sections",
)
fix_missing_skill_sections = load_module(
"tools/scripts/fix_missing_skill_sections.py",
"fix_missing_skill_sections_for_cleanup_tests",
)
class CleanupSyntheticSkillSectionsTests(unittest.TestCase):
def test_remove_exact_section_preserves_other_content(self):
content = """---
name: demo
description: Demo description.
---
# Demo
## When to Use
- Use this skill when demo work is needed.
## Examples
```text
Use @demo for this task:
foo
```
## Notes
Keep this section.
"""
section = """## Examples
```text
Use @demo for this task:
foo
```"""
updated = cleanup_synthetic_skill_sections.remove_exact_section(content, section)
self.assertNotIn("## Examples", updated)
self.assertIn("## Notes", updated)
self.assertIn("Keep this section.", updated)
def test_cleanup_skill_file_removes_only_generated_sections_missing_from_head(self):
with tempfile.TemporaryDirectory() as temp_dir:
repo_root = Path(temp_dir)
skill_dir = repo_root / "skills" / "demo"
skill_dir.mkdir(parents=True)
skill_path = skill_dir / "SKILL.md"
description = "Build and distribute Expo development clients locally or via TestFlight."
generated_when = fix_missing_skill_sections.build_when_section("demo", description)
generated_examples = fix_missing_skill_sections.build_examples_section("demo", description)
current_content = f"""---
name: demo
description: {description}
---
# Demo
{generated_when}
{generated_examples}
## Notes
Human-written content.
"""
skill_path.write_text(current_content, encoding="utf-8")
head_content = f"""---
name: demo
description: {description}
---
# Demo
## Notes
Human-written content.
"""
with mock.patch.object(
cleanup_synthetic_skill_sections,
"get_head_content",
return_value=head_content,
):
changed, changes = cleanup_synthetic_skill_sections.cleanup_skill_file(repo_root, skill_path)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertEqual(
changes,
["removed_synthetic_when_to_use", "removed_synthetic_examples"],
)
self.assertNotIn(generated_when, updated)
self.assertNotIn(generated_examples, updated)
self.assertIn("## Notes", updated)
def test_cleanup_skill_file_keeps_real_sections_that_already_existed_in_head(self):
with tempfile.TemporaryDirectory() as temp_dir:
repo_root = Path(temp_dir)
skill_dir = repo_root / "skills" / "demo"
skill_dir.mkdir(parents=True)
skill_path = skill_dir / "SKILL.md"
description = "Build and distribute Expo development clients locally or via TestFlight."
generated_when = fix_missing_skill_sections.build_when_section("demo", description)
generated_examples = fix_missing_skill_sections.build_examples_section("demo", description)
current_content = f"""---
name: demo
description: {description}
---
# Demo
{generated_when}
{generated_examples}
"""
skill_path.write_text(current_content, encoding="utf-8")
with mock.patch.object(
cleanup_synthetic_skill_sections,
"get_head_content",
return_value=current_content,
):
changed, changes = cleanup_synthetic_skill_sections.cleanup_skill_file(repo_root, skill_path)
updated = skill_path.read_text(encoding="utf-8")
self.assertFalse(changed)
self.assertEqual(changes, [])
self.assertEqual(updated, current_content)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,127 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
fix_missing_skill_sections = load_module(
"tools/scripts/fix_missing_skill_sections.py",
"fix_missing_skill_sections",
)
class FixMissingSkillSectionsTests(unittest.TestCase):
def test_normalizes_when_heading_variant(self):
content = """---
name: demo
description: Demo description.
---
# Demo
## When to Activate
Activate this skill when:
- something happens
"""
updated = fix_missing_skill_sections.normalize_when_heading_variants(content)
self.assertIn("## When to Use", updated)
self.assertNotIn("## When to Activate", updated)
def test_update_skill_file_adds_missing_sections(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: Structured guide for setting up A/B tests with mandatory gates for hypothesis and metrics.
---
# Demo
Intro paragraph.
""",
encoding="utf-8",
)
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path, add_missing=True)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertIn("added_when_to_use", changes)
self.assertIn("added_examples", changes)
self.assertIn("## When to Use", updated)
self.assertIn("## Examples", updated)
self.assertIn("Use @demo for this task:", updated)
def test_update_skill_file_only_adds_examples_when_when_section_exists(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: Build and distribute Expo development clients locally or via TestFlight.
---
# Demo
## When to Use
- Use this skill when native Expo changes need a dev client.
""",
encoding="utf-8",
)
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path, add_missing=True)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertNotIn("added_when_to_use", changes)
self.assertIn("added_examples", changes)
self.assertEqual(updated.count("## When to Use"), 1)
self.assertIn("## Examples", updated)
def test_update_skill_file_defaults_to_normalization_only(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: Demo description.
---
# Demo
## When to Activate
Activate this skill when:
- something happens
""",
encoding="utf-8",
)
changed, changes = fix_missing_skill_sections.update_skill_file(skill_path)
updated = skill_path.read_text(encoding="utf-8")
self.assertTrue(changed)
self.assertEqual(changes, ["normalized_when_heading"])
self.assertIn("## When to Use", updated)
self.assertNotIn("## Examples", updated)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,112 @@
import importlib.util
import sys
import tempfile
import unittest
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[3]
TOOLS_SCRIPTS_DIR = REPO_ROOT / "tools" / "scripts"
if str(TOOLS_SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(TOOLS_SCRIPTS_DIR))
def load_module(relative_path: str, module_name: str):
module_path = REPO_ROOT / relative_path
spec = importlib.util.spec_from_file_location(module_name, module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
fix_truncated_descriptions = load_module(
"tools/scripts/fix_truncated_descriptions.py",
"fix_truncated_descriptions",
)
class FixTruncatedDescriptionsTests(unittest.TestCase):
def test_pick_candidate_prefers_matching_paragraph(self):
description = "Master API design principles for resilient services..."
body = """
# Heading
Master API design principles for resilient services and consistent developer experience.
Another paragraph.
"""
candidate = fix_truncated_descriptions.pick_candidate(description, body)
self.assertEqual(
candidate,
"Master API design principles for resilient services and consistent developer experience.",
)
def test_update_skill_file_rewrites_single_line_description(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: "This description is truncated..."
risk: safe
source: self
---
# Demo
This skill helps you do something useful in a complete way.
""",
encoding="utf-8",
)
changed, new_description = fix_truncated_descriptions.update_skill_file(skill_path)
self.assertTrue(changed)
self.assertEqual(
new_description,
"This skill helps you do something useful in a complete way.",
)
updated = skill_path.read_text(encoding="utf-8")
self.assertIn(
'description: "This skill helps you do something useful in a complete way."',
updated,
)
def test_update_skill_file_rewrites_block_scalar_description(self):
with tempfile.TemporaryDirectory() as temp_dir:
skill_path = Path(temp_dir) / "SKILL.md"
skill_path.write_text(
"""---
name: demo
description: |
Interact with calendar data and schedule meetings,
update events, or...
risk: safe
source: self
---
# Demo
Lightweight calendar automation with standalone OAuth authentication and event management commands.
""",
encoding="utf-8",
)
changed, new_description = fix_truncated_descriptions.update_skill_file(skill_path)
self.assertTrue(changed)
self.assertEqual(
new_description,
"Lightweight calendar automation with standalone OAuth authentication and event management commands.",
)
updated = skill_path.read_text(encoding="utf-8")
self.assertIn(
'description: "Lightweight calendar automation with standalone OAuth authentication and event management commands."',
updated,
)
if __name__ == "__main__":
unittest.main()