fix: harden registry tooling, make tests hermetic, and restore metadata consistency (#168)

* chore: upgrade maintenance scripts to robust PyYAML parsing

- Replaces fragile regex frontmatter parsing with PyYAML/yaml library
- Ensures multi-line descriptions and complex characters are handled safely
- Normalizes quoting and field ordering across all maintenance scripts
- Updates validator to strictly enforce description quality

* fix: restore and refine truncated skill descriptions

- Recovered 223+ truncated descriptions from git history (6.5.0 regression)
- Refined long descriptions into concise, complete sentences (<200 chars)
- Added missing descriptions for brainstorming and orchestration skills
- Manually fixed imagen skill description
- Resolved dangling links in competitor-alternatives skill

* chore: sync generated registry files and document fixes

- Regenerated skills index with normalized forward-slash paths
- Updated README and CATALOG to reflect restored descriptions
- Documented restoration and script improvements in CHANGELOG.md

* fix: restore missing skill and align metadata for full 955 count

- Renamed SKILL.MD to SKILL.md in andruia-skill-smith to ensure indexing
- Fixed risk level and missing section in andruia-skill-smith
- Synchronized all registry files for final 955 skill count

* chore(scripts): add cross-platform runners and hermetic test orchestration

* fix(scripts): harden utf-8 output and clone target writeability

* fix(skills): add missing date metadata for strict validation

* chore(index): sync generated metadata dates

* fix(catalog): normalize skill paths to prevent CI drift

* chore: sync generated registry files

* fix: enforce LF line endings for generated registry files
This commit is contained in:
Ares
2026-03-01 08:38:25 +00:00
committed by GitHub
parent c9a76a2d94
commit 4a5f1234bb
258 changed files with 4296 additions and 1809 deletions

View File

@@ -128,8 +128,10 @@ def categorize_skill(skill_name, description):
return None
import yaml
def auto_categorize(skills_dir, dry_run=False):
"""Auto-categorize skills and update generate_index.py"""
"""Auto-categorize skills and update SKILL.md files"""
skills = []
categorized_count = 0
already_categorized = 0
@@ -146,17 +148,19 @@ def auto_categorize(skills_dir, dry_run=False):
with open(skill_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract name and description from frontmatter
# Extract frontmatter and body
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not fm_match:
continue
fm_text = fm_match.group(1)
metadata = {}
for line in fm_text.split('\n'):
if ':' in line and not line.strip().startswith('#'):
key, val = line.split(':', 1)
metadata[key.strip()] = val.strip().strip('"').strip("'")
body = content[fm_match.end():]
try:
metadata = yaml.safe_load(fm_text) or {}
except yaml.YAMLError as e:
print(f"⚠️ {skill_id}: YAML error - {e}")
continue
skill_name = metadata.get('name', skill_id)
description = metadata.get('description', '')
@@ -186,32 +190,12 @@ def auto_categorize(skills_dir, dry_run=False):
})
if not dry_run:
# Update the SKILL.md file - add or replace category
fm_start = content.find('---')
fm_end = content.find('---', fm_start + 3)
metadata['category'] = new_category
new_fm = yaml.dump(metadata, sort_keys=False, allow_unicode=True, width=1000).strip()
new_content = f"---\n{new_fm}\n---" + body
if fm_start >= 0 and fm_end > fm_start:
frontmatter = content[fm_start:fm_end+3]
body = content[fm_end+3:]
# Check if category exists in frontmatter
if 'category:' in frontmatter:
# Replace existing category
new_frontmatter = re.sub(
r'category:\s*\w+',
f'category: {new_category}',
frontmatter
)
else:
# Add category before the closing ---
new_frontmatter = frontmatter.replace(
'\n---',
f'\ncategory: {new_category}\n---'
)
new_content = new_frontmatter + body
with open(skill_path, 'w', encoding='utf-8') as f:
f.write(new_content)
with open(skill_path, 'w', encoding='utf-8') as f:
f.write(new_content)
categorized_count += 1
else:

View File

@@ -628,7 +628,8 @@ function buildCatalog() {
category,
tags,
triggers,
path: path.relative(ROOT, skill.path),
// Normalize separators for deterministic cross-platform output.
path: path.relative(ROOT, skill.path).split(path.sep).join("/"),
});
}

71
scripts/copy-file.js Normal file
View File

@@ -0,0 +1,71 @@
#!/usr/bin/env node
'use strict';
const fs = require('node:fs');
const path = require('node:path');
const args = process.argv.slice(2);
if (args.length !== 2) {
console.error('Usage: node scripts/copy-file.js <source> <destination>');
process.exit(1);
}
const [sourceInput, destinationInput] = args;
const projectRoot = path.resolve(__dirname, '..');
const sourcePath = path.resolve(projectRoot, sourceInput);
const destinationPath = path.resolve(projectRoot, destinationInput);
const destinationDir = path.dirname(destinationPath);
function fail(message) {
console.error(message);
process.exit(1);
}
function isInsideProjectRoot(targetPath) {
const relativePath = path.relative(projectRoot, targetPath);
return relativePath === '' || (!relativePath.startsWith('..') && !path.isAbsolute(relativePath));
}
if (!isInsideProjectRoot(sourcePath) || !isInsideProjectRoot(destinationPath)) {
fail('Source and destination must resolve inside the project root.');
}
if (sourcePath === destinationPath) {
fail('Source and destination must be different files.');
}
if (!fs.existsSync(sourcePath)) {
fail(`Source file not found: ${sourceInput}`);
}
let sourceStats;
try {
sourceStats = fs.statSync(sourcePath);
} catch (error) {
fail(`Unable to read source file "${sourceInput}": ${error.message}`);
}
if (!sourceStats.isFile()) {
fail(`Source is not a file: ${sourceInput}`);
}
let destinationDirStats;
try {
destinationDirStats = fs.statSync(destinationDir);
} catch {
fail(`Destination directory not found: ${path.relative(projectRoot, destinationDir)}`);
}
if (!destinationDirStats.isDirectory()) {
fail(`Destination parent is not a directory: ${path.relative(projectRoot, destinationDir)}`);
}
try {
fs.copyFileSync(sourcePath, destinationPath);
} catch (error) {
fail(`Copy failed (${sourceInput} -> ${destinationInput}): ${error.message}`);
}
console.log(`Copied ${sourceInput} -> ${destinationInput}`);

View File

@@ -1,5 +1,6 @@
import os
import re
import yaml
def fix_skills(skills_dir):
for root, dirs, files in os.walk(skills_dir):
@@ -14,33 +15,31 @@ def fix_skills(skills_dir):
continue
fm_text = fm_match.group(1)
body = content[fm_match.end():]
folder_name = os.path.basename(root)
new_fm_lines = []
try:
metadata = yaml.safe_load(fm_text) or {}
except yaml.YAMLError as e:
print(f"⚠️ {skill_path}: YAML error - {e}")
continue
changed = False
for line in fm_text.split('\n'):
if line.startswith('name:'):
old_name = line.split(':', 1)[1].strip().strip('"').strip("'")
if old_name != folder_name:
new_fm_lines.append(f"name: {folder_name}")
changed = True
else:
new_fm_lines.append(line)
elif line.startswith('description:'):
desc = line.split(':', 1)[1].strip().strip('"').strip("'")
if len(desc) > 200:
# trim to 197 chars and add "..."
short_desc = desc[:197] + "..."
new_fm_lines.append(f'description: "{short_desc}"')
changed = True
else:
new_fm_lines.append(line)
else:
new_fm_lines.append(line)
# 1. Fix Name
if metadata.get('name') != folder_name:
metadata['name'] = folder_name
changed = True
# 2. Fix Description length
desc = metadata.get('description', '')
if isinstance(desc, str) and len(desc) > 200:
metadata['description'] = desc[:197] + "..."
changed = True
if changed:
new_fm_text = '\n'.join(new_fm_lines)
new_content = content[:fm_match.start(1)] + new_fm_text + content[fm_match.end(1):]
new_fm = yaml.dump(metadata, sort_keys=False, allow_unicode=True, width=1000).strip()
new_content = f"---\n{new_fm}\n---" + body
with open(skill_path, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f"Fixed {skill_path}")

View File

@@ -1,9 +1,9 @@
import os
import re
import json
import yaml
def fix_yaml_quotes(skills_dir):
print(f"Scanning for YAML quoting errors in {skills_dir}...")
print(f"Normalizing YAML frontmatter in {skills_dir}...")
fixed_count = 0
for root, dirs, files in os.walk(skills_dir):
@@ -21,42 +21,24 @@ def fix_yaml_quotes(skills_dir):
continue
fm_text = fm_match.group(1)
new_fm_lines = []
changed = False
body = content[fm_match.end():]
for line in fm_text.split('\n'):
if line.startswith('description:'):
key, val = line.split(':', 1)
val = val.strip()
# Store original to check if it matches the fixed version
orig_val = val
# Strip matching outer quotes if they exist
if val.startswith('"') and val.endswith('"') and len(val) >= 2:
val = val[1:-1]
elif val.startswith("'") and val.endswith("'") and len(val) >= 2:
val = val[1:-1]
# Now safely encode using JSON to handle internal escapes
safe_val = json.dumps(val)
if safe_val != orig_val:
new_line = f"description: {safe_val}"
new_fm_lines.append(new_line)
changed = True
continue
new_fm_lines.append(line)
try:
# safe_load and then dump will normalize quoting automatically
metadata = yaml.safe_load(fm_text) or {}
new_fm = yaml.dump(metadata, sort_keys=False, allow_unicode=True, width=1000).strip()
if changed:
new_fm_text = '\n'.join(new_fm_lines)
new_content = content[:fm_match.start(1)] + new_fm_text + content[fm_match.end(1):]
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
print(f"Fixed quotes in {os.path.relpath(file_path, skills_dir)}")
fixed_count += 1
# Check if it actually changed something significant (beyond just style)
# but normalization is good anyway. We'll just compare the fm_text.
if new_fm.strip() != fm_text.strip():
new_content = f"---\n{new_fm}\n---" + body
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
fixed_count += 1
except yaml.YAMLError as e:
print(f"⚠️ {file_path}: YAML error - {e}")
print(f"Total files fixed: {fixed_count}")
print(f"Total files normalized: {fixed_count}")
if __name__ == '__main__':
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

View File

@@ -59,9 +59,11 @@ def generate_index(skills_dir, output_file):
parent_dir = os.path.basename(os.path.dirname(root))
# Default values
rel_path = os.path.relpath(root, os.path.dirname(skills_dir))
# Force forward slashes for cross-platform JSON compatibility
skill_info = {
"id": dir_name,
"path": os.path.relpath(root, os.path.dirname(skills_dir)),
"path": rel_path.replace(os.sep, '/'),
"category": parent_dir if parent_dir != "skills" else None, # Will be overridden by frontmatter if present
"name": dir_name.replace("-", " ").title(),
"description": "",
@@ -117,7 +119,7 @@ def generate_index(skills_dir, output_file):
# Sort validation: by name
skills.sort(key=lambda x: (x["name"].lower(), x["id"].lower()))
with open(output_file, 'w', encoding='utf-8') as f:
with open(output_file, 'w', encoding='utf-8', newline='\n') as f:
json.dump(skills, f, indent=2)
print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}")

View File

@@ -18,20 +18,19 @@ def get_project_root():
"""Get the project root directory."""
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
import yaml
def parse_frontmatter(content):
"""Parse frontmatter from SKILL.md content."""
"""Parse frontmatter from SKILL.md content using PyYAML."""
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not fm_match:
return None
fm_text = fm_match.group(1)
metadata = {}
for line in fm_text.split('\n'):
if ':' in line and not line.strip().startswith('#'):
key, val = line.split(':', 1)
metadata[key.strip()] = val.strip().strip('"').strip("'")
return metadata
try:
return yaml.safe_load(fm_text) or {}
except yaml.YAMLError:
return None
def generate_skills_report(output_file=None, sort_by='date'):
"""Generate a report of all skills with their metadata."""

View File

@@ -26,45 +26,39 @@ def get_project_root():
"""Get the project root directory."""
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
import yaml
def parse_frontmatter(content):
"""Parse frontmatter from SKILL.md content."""
"""Parse frontmatter from SKILL.md content using PyYAML."""
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not fm_match:
return None, content
fm_text = fm_match.group(1)
metadata = {}
for line in fm_text.split('\n'):
if ':' in line and not line.strip().startswith('#'):
key, val = line.split(':', 1)
metadata[key.strip()] = val.strip().strip('"').strip("'")
return metadata, content
try:
metadata = yaml.safe_load(fm_text) or {}
return metadata, content
except yaml.YAMLError as e:
print(f"⚠️ YAML parsing error: {e}")
return None, content
def reconstruct_frontmatter(metadata):
"""Reconstruct frontmatter from metadata dict."""
lines = ["---"]
# Order: id, name, description, category, risk, source, tags, date_added
priority_keys = ['id', 'name', 'description', 'category', 'risk', 'source', 'tags']
"""Reconstruct frontmatter from metadata dict using PyYAML."""
# Ensure important keys are at the top if they exist
ordered = {}
priority_keys = ['id', 'name', 'description', 'category', 'risk', 'source', 'tags', 'date_added']
for key in priority_keys:
if key in metadata:
val = metadata[key]
if isinstance(val, list):
# Handle list fields like tags
lines.append(f'{key}: {val}')
elif ' ' in str(val) or any(c in str(val) for c in ':#"'):
lines.append(f'{key}: "{val}"')
else:
lines.append(f'{key}: {val}')
ordered[key] = metadata[key]
# Add date_added at the end
if 'date_added' in metadata:
lines.append(f'date_added: "{metadata["date_added"]}"')
lines.append("---")
return '\n'.join(lines)
# Add any remaining keys
for key, value in metadata.items():
if key not in ordered:
ordered[key] = value
fm_text = yaml.dump(ordered, sort_keys=False, allow_unicode=True, width=1000).strip()
return f"---\n{fm_text}\n---"
def update_skill_frontmatter(skill_path, metadata):
"""Update a skill's frontmatter with new metadata."""

View File

@@ -14,6 +14,9 @@ const ALLOWED_FIELDS = new Set([
'compatibility',
'metadata',
'allowed-tools',
'date_added',
'category',
'id',
]);
function isPlainObject(value) {
@@ -122,7 +125,8 @@ function normalizeSkill(skillId) {
if (!modified) return false;
const ordered = {};
for (const key of ['name', 'description', 'license', 'compatibility', 'allowed-tools', 'metadata']) {
const order = ['id', 'name', 'description', 'category', 'risk', 'source', 'license', 'compatibility', 'date_added', 'allowed-tools', 'metadata'];
for (const key of order) {
if (updated[key] !== undefined) {
ordered[key] = updated[key];
}

90
scripts/run-python.js Normal file
View File

@@ -0,0 +1,90 @@
#!/usr/bin/env node
'use strict';
const { spawn, spawnSync } = require('node:child_process');
const args = process.argv.slice(2);
if (args.length === 0) {
console.error('Usage: node scripts/run-python.js <script.py> [args...]');
process.exit(1);
}
function uniqueCandidates(candidates) {
const seen = new Set();
const unique = [];
for (const candidate of candidates) {
const key = candidate.join('\u0000');
if (!seen.has(key)) {
seen.add(key);
unique.push(candidate);
}
}
return unique;
}
function getPythonCandidates() {
// Optional override for CI/local pinning without editing scripts.
const configuredPython =
process.env.ANTIGRAVITY_PYTHON || process.env.npm_config_python;
const candidates = [
configuredPython ? [configuredPython] : null,
// Keep this ordered list easy to update if project requirements change.
['python3'],
['python'],
['py', '-3'],
].filter(Boolean);
return uniqueCandidates(candidates);
}
function canRun(candidate) {
const [command, ...baseArgs] = candidate;
const probe = spawnSync(
command,
[...baseArgs, '-c', 'import sys; raise SystemExit(0 if sys.version_info[0] == 3 else 1)'],
{
stdio: 'ignore',
shell: false,
},
);
return probe.error == null && probe.status === 0;
}
const pythonCandidates = getPythonCandidates();
const selected = pythonCandidates.find(canRun);
if (!selected) {
console.error(
'Unable to find a Python 3 interpreter. Tried: python3, python, py -3',
);
process.exit(1);
}
const [command, ...baseArgs] = selected;
const child = spawn(command, [...baseArgs, ...args], {
stdio: 'inherit',
shell: false,
});
child.on('error', (error) => {
console.error(`Failed to start Python interpreter "${command}": ${error.message}`);
process.exit(1);
});
child.on('exit', (code, signal) => {
if (signal) {
try {
process.kill(process.pid, signal);
} catch {
process.exit(1);
}
return;
}
process.exit(code ?? 1);
});

View File

@@ -59,8 +59,10 @@ def cleanup_previous_sync():
return removed_count
import yaml
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
"""Extract the 'name' field from SKILL.md YAML frontmatter using PyYAML."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
@@ -70,13 +72,11 @@ def extract_skill_name(skill_md_path: Path) -> str | None:
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
try:
data = yaml.safe_load(fm_match.group(1)) or {}
return data.get('name')
except Exception:
return None
def generate_fallback_name(relative_path: Path) -> str:

View File

@@ -5,13 +5,61 @@ Shows the repository layout, skill locations, and what flat names would be gener
"""
import re
import io
import shutil
import subprocess
import sys
import tempfile
import traceback
import uuid
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
def create_clone_target(prefix: str) -> Path:
"""Return a writable, non-existent path for git clone destination."""
repo_tmp_root = Path(__file__).resolve().parents[2] / ".tmp" / "tests"
candidate_roots = (repo_tmp_root, Path(tempfile.gettempdir()))
last_error: OSError | None = None
for root in candidate_roots:
try:
root.mkdir(parents=True, exist_ok=True)
probe_file = root / f".{prefix}write-probe-{uuid.uuid4().hex}.tmp"
with probe_file.open("xb"):
pass
probe_file.unlink()
return root / f"{prefix}{uuid.uuid4().hex}"
except OSError as exc:
last_error = exc
if last_error is not None:
raise last_error
raise OSError("Unable to determine clone destination")
def configure_utf8_output() -> None:
"""Best-effort UTF-8 stdout/stderr on Windows without dropping diagnostics."""
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name)
try:
stream.reconfigure(encoding="utf-8", errors="backslashreplace")
continue
except Exception:
pass
buffer = getattr(stream, "buffer", None)
if buffer is not None:
setattr(
sys,
stream_name,
io.TextIOWrapper(
buffer, encoding="utf-8", errors="backslashreplace"
),
)
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
@@ -37,18 +85,26 @@ def inspect_repo():
print("🔍 Inspecting Microsoft Skills Repository Structure")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
repo_path: Path | None = None
try:
repo_path = create_clone_target(prefix="ms-skills-")
print("\n1⃣ Cloning repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
check=True,
capture_output=True,
)
try:
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(repo_path)],
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as exc:
print("\n❌ git clone failed.", file=sys.stderr)
if exc.stderr:
print(exc.stderr.strip(), file=sys.stderr)
raise
# Find all SKILL.md files
all_skill_mds = list(temp_path.rglob("SKILL.md"))
all_skill_mds = list(repo_path.rglob("SKILL.md"))
print(f"\n2⃣ Total SKILL.md files found: {len(all_skill_mds)}")
# Show flat name mapping
@@ -59,7 +115,7 @@ def inspect_repo():
for skill_md in sorted(all_skill_mds, key=lambda p: str(p)):
try:
rel = skill_md.parent.relative_to(temp_path)
rel = skill_md.parent.relative_to(repo_path)
except ValueError:
rel = skill_md.parent
@@ -87,12 +143,18 @@ def inspect_repo():
f"\n4⃣ ✅ No name collisions — all {len(names_seen)} names are unique!")
print("\n✨ Inspection complete!")
finally:
if repo_path is not None:
shutil.rmtree(repo_path, ignore_errors=True)
if __name__ == "__main__":
configure_utf8_output()
try:
inspect_repo()
except subprocess.CalledProcessError as exc:
sys.exit(exc.returncode or 1)
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
print(f"\n❌ Error: {e}", file=sys.stderr)
traceback.print_exc(file=sys.stderr)
sys.exit(1)

View File

@@ -0,0 +1,76 @@
#!/usr/bin/env node
const { spawnSync } = require("child_process");
const NETWORK_TEST_ENV = "ENABLE_NETWORK_TESTS";
const ENABLED_VALUES = new Set(["1", "true", "yes", "on"]);
const LOCAL_TEST_COMMANDS = [
["scripts/tests/validate_skills_headings.test.js"],
["scripts/run-python.js", "scripts/tests/test_validate_skills_headings.py"],
];
const NETWORK_TEST_COMMANDS = [
["scripts/run-python.js", "scripts/tests/inspect_microsoft_repo.py"],
["scripts/run-python.js", "scripts/tests/test_comprehensive_coverage.py"],
];
function isNetworkTestsEnabled() {
const value = process.env[NETWORK_TEST_ENV];
if (!value) {
return false;
}
return ENABLED_VALUES.has(String(value).trim().toLowerCase());
}
function runNodeCommand(args) {
const result = spawnSync(process.execPath, args, { stdio: "inherit" });
if (result.error) {
throw result.error;
}
if (result.signal) {
process.kill(process.pid, result.signal);
}
if (typeof result.status !== "number") {
process.exit(1);
}
if (result.status !== 0) {
process.exit(result.status);
}
}
function runCommandSet(commands) {
for (const commandArgs of commands) {
runNodeCommand(commandArgs);
}
}
function main() {
const mode = process.argv[2];
if (mode === "--local") {
runCommandSet(LOCAL_TEST_COMMANDS);
return;
}
if (mode === "--network") {
runCommandSet(NETWORK_TEST_COMMANDS);
return;
}
runCommandSet(LOCAL_TEST_COMMANDS);
if (!isNetworkTestsEnabled()) {
console.log(
`[tests] Skipping network integration tests. Set ${NETWORK_TEST_ENV}=1 to enable.`,
);
return;
}
console.log(`[tests] ${NETWORK_TEST_ENV} enabled; running network integration tests.`);
runCommandSet(NETWORK_TEST_COMMANDS);
}
main();

View File

@@ -5,14 +5,62 @@ Ensures all skills are captured and no directory name collisions exist.
"""
import re
import io
import shutil
import subprocess
import sys
import tempfile
import traceback
import uuid
from pathlib import Path
from collections import defaultdict
MS_REPO = "https://github.com/microsoft/skills.git"
def create_clone_target(prefix: str) -> Path:
"""Return a writable, non-existent path for git clone destination."""
repo_tmp_root = Path(__file__).resolve().parents[2] / ".tmp" / "tests"
candidate_roots = (repo_tmp_root, Path(tempfile.gettempdir()))
last_error: OSError | None = None
for root in candidate_roots:
try:
root.mkdir(parents=True, exist_ok=True)
probe_file = root / f".{prefix}write-probe-{uuid.uuid4().hex}.tmp"
with probe_file.open("xb"):
pass
probe_file.unlink()
return root / f"{prefix}{uuid.uuid4().hex}"
except OSError as exc:
last_error = exc
if last_error is not None:
raise last_error
raise OSError("Unable to determine clone destination")
def configure_utf8_output() -> None:
"""Best-effort UTF-8 stdout/stderr on Windows without dropping diagnostics."""
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name)
try:
stream.reconfigure(encoding="utf-8", errors="backslashreplace")
continue
except Exception:
pass
buffer = getattr(stream, "buffer", None)
if buffer is not None:
setattr(
sys,
stream_name,
io.TextIOWrapper(
buffer, encoding="utf-8", errors="backslashreplace"
),
)
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
@@ -41,27 +89,35 @@ def analyze_skill_locations():
print("🔬 Comprehensive Skill Coverage & Uniqueness Analysis")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
repo_path: Path | None = None
try:
repo_path = create_clone_target(prefix="ms-skills-")
print("\n1⃣ Cloning repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
check=True,
capture_output=True,
)
try:
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(repo_path)],
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as exc:
print("\n❌ git clone failed.", file=sys.stderr)
if exc.stderr:
print(exc.stderr.strip(), file=sys.stderr)
raise
# Find ALL SKILL.md files
all_skill_files = list(temp_path.rglob("SKILL.md"))
all_skill_files = list(repo_path.rglob("SKILL.md"))
print(f"\n2⃣ Total SKILL.md files found: {len(all_skill_files)}")
# Categorize by location
location_types = defaultdict(list)
for skill_file in all_skill_files:
path_str = str(skill_file)
if ".github/skills" in path_str:
path_str = skill_file.as_posix()
if ".github/skills/" in path_str:
location_types["github_skills"].append(skill_file)
elif ".github/plugins" in path_str:
elif ".github/plugins/" in path_str:
location_types["github_plugins"].append(skill_file)
elif "/skills/" in path_str:
location_types["skills_dir"].append(skill_file)
@@ -81,7 +137,7 @@ def analyze_skill_locations():
for skill_file in all_skill_files:
try:
rel = skill_file.parent.relative_to(temp_path)
rel = skill_file.parent.relative_to(repo_path)
except ValueError:
rel = skill_file.parent
@@ -163,9 +219,13 @@ def analyze_skill_locations():
"invalid_names": len(invalid_names),
"passed": is_pass,
}
finally:
if repo_path is not None:
shutil.rmtree(repo_path, ignore_errors=True)
if __name__ == "__main__":
configure_utf8_output()
try:
results = analyze_skill_locations()
@@ -176,14 +236,18 @@ if __name__ == "__main__":
if results["passed"]:
print("\n✅ V4 FLAT STRUCTURE IS VALID")
print(" All names are unique and valid directory names!")
sys.exit(0)
else:
print("\n⚠️ V4 FLAT STRUCTURE NEEDS FIXES")
if results["collisions"] > 0:
print(f" {results['collisions']} name collisions to resolve")
if results["invalid_names"] > 0:
print(f" {results['invalid_names']} invalid directory names")
sys.exit(1)
except subprocess.CalledProcessError as exc:
sys.exit(exc.returncode or 1)
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
print(f"\n❌ Error: {e}", file=sys.stderr)
traceback.print_exc(file=sys.stderr)
sys.exit(1)

View File

@@ -1,7 +1,31 @@
#!/usr/bin/env python3
import io
import json
import os
import re
import sys
def configure_utf8_output() -> None:
"""Best-effort UTF-8 stdout/stderr on Windows without dropping diagnostics."""
if sys.platform != "win32":
return
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name)
try:
stream.reconfigure(encoding="utf-8", errors="backslashreplace")
continue
except Exception:
pass
buffer = getattr(stream, "buffer", None)
if buffer is not None:
setattr(
sys,
stream_name,
io.TextIOWrapper(buffer, encoding="utf-8", errors="backslashreplace"),
)
def update_readme():
@@ -55,11 +79,12 @@ def update_readme():
content,
)
with open(readme_path, "w", encoding="utf-8") as f:
with open(readme_path, "w", encoding="utf-8", newline="\n") as f:
f.write(content)
print("✅ README.md updated successfully.")
if __name__ == "__main__":
configure_utf8_output()
update_readme()

View File

@@ -2,6 +2,29 @@ import os
import re
import argparse
import sys
import io
def configure_utf8_output() -> None:
"""Best-effort UTF-8 stdout/stderr on Windows without dropping diagnostics."""
if sys.platform != "win32":
return
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name)
try:
stream.reconfigure(encoding="utf-8", errors="backslashreplace")
continue
except Exception:
pass
buffer = getattr(stream, "buffer", None)
if buffer is not None:
setattr(
sys,
stream_name,
io.TextIOWrapper(buffer, encoding="utf-8", errors="backslashreplace"),
)
WHEN_TO_USE_PATTERNS = [
re.compile(r"^##\s+When\s+to\s+Use", re.MULTILINE | re.IGNORECASE),
@@ -12,39 +35,37 @@ WHEN_TO_USE_PATTERNS = [
def has_when_to_use_section(content):
return any(pattern.search(content) for pattern in WHEN_TO_USE_PATTERNS)
import yaml
def parse_frontmatter(content, rel_path=None):
"""
Simple frontmatter parser using regex to avoid external dependencies.
Returns a dict of key-values.
Parse frontmatter using PyYAML for robustness.
Returns a dict of key-values and a list of error messages.
"""
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not fm_match:
return None, []
return None, ["Missing or malformed YAML frontmatter"]
fm_text = fm_match.group(1)
metadata = {}
lines = fm_text.split('\n')
fm_errors = []
for i, line in enumerate(lines):
if ':' in line:
key, val = line.split(':', 1)
metadata[key.strip()] = val.strip().strip('"').strip("'")
# Check for multi-line description issue (problem identification for the user)
if key.strip() == "description":
stripped_val = val.strip()
if (stripped_val.startswith('"') and stripped_val.endswith('"')) or \
(stripped_val.startswith("'") and stripped_val.endswith("'")):
if i + 1 < len(lines) and lines[i+1].startswith(' '):
fm_errors.append(f"description is wrapped in quotes but followed by indented lines. This causes YAML truncation.")
# Check for literal indicators wrapped in quotes
if stripped_val in ['"|"', "'>'", '"|"', "'>'"]:
fm_errors.append(f"description uses a block indicator {stripped_val} inside quotes. Remove quotes for proper YAML block behavior.")
return metadata, fm_errors
try:
metadata = yaml.safe_load(fm_text) or {}
# Identification of the specific regression issue for better reporting
if "description" in metadata:
desc = metadata["description"]
if not desc or (isinstance(desc, str) and not desc.strip()):
fm_errors.append("description field is empty or whitespace only.")
elif desc == "|":
fm_errors.append("description contains only the YAML block indicator '|', likely due to a parsing regression.")
return metadata, fm_errors
except yaml.YAMLError as e:
return None, [f"YAML Syntax Error: {e}"]
def validate_skills(skills_dir, strict_mode=False):
configure_utf8_output()
print(f"🔍 Validating skills in: {skills_dir}")
print(f"⚙️ Mode: {'STRICT (CI)' if strict_mode else 'Standard (Dev)'}")
@@ -90,12 +111,15 @@ def validate_skills(skills_dir, strict_mode=False):
elif metadata["name"] != os.path.basename(root):
errors.append(f"{rel_path}: Name '{metadata['name']}' does not match folder name '{os.path.basename(root)}'")
if "description" not in metadata:
if "description" not in metadata or metadata["description"] is None:
errors.append(f"{rel_path}: Missing 'description' in frontmatter")
else:
# agentskills-ref checks for short descriptions
if len(metadata["description"]) > 200:
errors.append(f"{rel_path}: Description is oversized ({len(metadata['description'])} chars). Must be concise.")
desc = metadata["description"]
if not isinstance(desc, str):
errors.append(f"{rel_path}: 'description' must be a string, got {type(desc).__name__}")
elif len(desc) > 300: # increased limit for multi-line support
errors.append(f"{rel_path}: Description is oversized ({len(desc)} chars). Must be concise.")
# Risk Validation (Quality Bar)
if "risk" not in metadata: