* chore: upgrade maintenance scripts to robust PyYAML parsing - Replaces fragile regex frontmatter parsing with PyYAML/yaml library - Ensures multi-line descriptions and complex characters are handled safely - Normalizes quoting and field ordering across all maintenance scripts - Updates validator to strictly enforce description quality * fix: restore and refine truncated skill descriptions - Recovered 223+ truncated descriptions from git history (6.5.0 regression) - Refined long descriptions into concise, complete sentences (<200 chars) - Added missing descriptions for brainstorming and orchestration skills - Manually fixed imagen skill description - Resolved dangling links in competitor-alternatives skill * chore: sync generated registry files and document fixes - Regenerated skills index with normalized forward-slash paths - Updated README and CATALOG to reflect restored descriptions - Documented restoration and script improvements in CHANGELOG.md * fix: restore missing skill and align metadata for full 955 count - Renamed SKILL.MD to SKILL.md in andruia-skill-smith to ensure indexing - Fixed risk level and missing section in andruia-skill-smith - Synchronized all registry files for final 955 skill count * chore(scripts): add cross-platform runners and hermetic test orchestration * fix(scripts): harden utf-8 output and clone target writeability * fix(skills): add missing date metadata for strict validation * chore(index): sync generated metadata dates * fix(catalog): normalize skill paths to prevent CI drift * chore: sync generated registry files * fix: enforce LF line endings for generated registry files
133 lines
5.2 KiB
Python
133 lines
5.2 KiB
Python
import os
|
|
import json
|
|
import re
|
|
import sys
|
|
|
|
import yaml
|
|
|
|
# Ensure UTF-8 output for Windows compatibility
|
|
if sys.platform == 'win32':
|
|
import io
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
|
|
|
|
def parse_frontmatter(content):
|
|
"""
|
|
Parses YAML frontmatter, sanitizing unquoted values containing @.
|
|
Handles single values and comma-separated lists by quoting the entire line.
|
|
"""
|
|
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
|
|
if not fm_match:
|
|
return {}
|
|
|
|
yaml_text = fm_match.group(1)
|
|
|
|
# Process line by line to handle values containing @ and commas
|
|
sanitized_lines = []
|
|
for line in yaml_text.splitlines():
|
|
# Match "key: value" (handles keys with dashes like 'package-name')
|
|
match = re.match(r'^(\s*[\w-]+):\s*(.*)$', line)
|
|
if match:
|
|
key, val = match.groups()
|
|
val_s = val.strip()
|
|
# If value contains @ and isn't already quoted, wrap the whole string in double quotes
|
|
if '@' in val_s and not (val_s.startswith('"') or val_s.startswith("'")):
|
|
# Escape any existing double quotes within the value string
|
|
safe_val = val_s.replace('"', '\\"')
|
|
line = f'{key}: "{safe_val}"'
|
|
sanitized_lines.append(line)
|
|
|
|
sanitized_yaml = '\n'.join(sanitized_lines)
|
|
|
|
try:
|
|
return yaml.safe_load(sanitized_yaml) or {}
|
|
except yaml.YAMLError as e:
|
|
print(f"⚠️ YAML parsing error: {e}")
|
|
return {}
|
|
|
|
def generate_index(skills_dir, output_file):
|
|
print(f"🏗️ Generating index from: {skills_dir}")
|
|
skills = []
|
|
|
|
for root, dirs, files in os.walk(skills_dir):
|
|
# Skip .disabled or hidden directories
|
|
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
|
|
|
if "SKILL.md" in files:
|
|
skill_path = os.path.join(root, "SKILL.md")
|
|
dir_name = os.path.basename(root)
|
|
parent_dir = os.path.basename(os.path.dirname(root))
|
|
|
|
# Default values
|
|
rel_path = os.path.relpath(root, os.path.dirname(skills_dir))
|
|
# Force forward slashes for cross-platform JSON compatibility
|
|
skill_info = {
|
|
"id": dir_name,
|
|
"path": rel_path.replace(os.sep, '/'),
|
|
"category": parent_dir if parent_dir != "skills" else None, # Will be overridden by frontmatter if present
|
|
"name": dir_name.replace("-", " ").title(),
|
|
"description": "",
|
|
"risk": "unknown",
|
|
"source": "unknown",
|
|
"date_added": None
|
|
}
|
|
|
|
try:
|
|
with open(skill_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
print(f"⚠️ Error reading {skill_path}: {e}")
|
|
continue
|
|
|
|
# Parse Metadata
|
|
metadata = parse_frontmatter(content)
|
|
|
|
# Merge Metadata (frontmatter takes priority)
|
|
if "name" in metadata: skill_info["name"] = metadata["name"]
|
|
if "description" in metadata: skill_info["description"] = metadata["description"]
|
|
if "risk" in metadata: skill_info["risk"] = metadata["risk"]
|
|
if "source" in metadata: skill_info["source"] = metadata["source"]
|
|
if "date_added" in metadata: skill_info["date_added"] = metadata["date_added"]
|
|
|
|
# Category: prefer frontmatter, then folder structure, then default
|
|
if "category" in metadata:
|
|
skill_info["category"] = metadata["category"]
|
|
elif skill_info["category"] is None:
|
|
skill_info["category"] = "uncategorized"
|
|
|
|
# Fallback for description if missing in frontmatter (legacy support)
|
|
if not skill_info["description"]:
|
|
body = content
|
|
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
|
|
if fm_match:
|
|
body = content[fm_match.end():].strip()
|
|
|
|
# Simple extraction of first non-header paragraph
|
|
lines = body.split('\n')
|
|
desc_lines = []
|
|
for line in lines:
|
|
if line.startswith('#') or not line.strip():
|
|
if desc_lines: break
|
|
continue
|
|
desc_lines.append(line.strip())
|
|
|
|
if desc_lines:
|
|
skill_info["description"] = " ".join(desc_lines)[:250].strip()
|
|
|
|
skills.append(skill_info)
|
|
|
|
# Sort validation: by name
|
|
skills.sort(key=lambda x: (x["name"].lower(), x["id"].lower()))
|
|
|
|
with open(output_file, 'w', encoding='utf-8', newline='\n') as f:
|
|
json.dump(skills, f, indent=2)
|
|
|
|
print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}")
|
|
return skills
|
|
|
|
if __name__ == "__main__":
|
|
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
skills_path = os.path.join(base_dir, "skills")
|
|
output_path = os.path.join(base_dir, "skills_index.json")
|
|
generate_index(skills_path, output_path)
|