feat: implement Phase 2 Automation & CI (validate_skills, generate_index, ci.yml) [skip ci]

This commit is contained in:
sck_0
2026-01-25 19:19:51 +01:00
parent 1557826c5d
commit 4fe8a1e6a4
4 changed files with 1210 additions and 324 deletions

View File

@@ -2,69 +2,90 @@ import os
import json
import re
def parse_frontmatter(content):
"""
Simple frontmatter parser using regex (consistent with validate_skills.py).
"""
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not fm_match:
return {}
fm_text = fm_match.group(1)
metadata = {}
for line in fm_text.split('\n'):
if ':' in line:
key, val = line.split(':', 1)
metadata[key.strip()] = val.strip().strip('"').strip("'")
return metadata
def generate_index(skills_dir, output_file):
print(f"🏗️ Generating index from: {skills_dir}")
skills = []
for root, dirs, files in os.walk(skills_dir):
# Skip .disabled directories
dirs[:] = [d for d in dirs if d != '.disabled']
# Skip .disabled or hidden directories
dirs[:] = [d for d in dirs if not d.startswith('.')]
if "SKILL.md" in files:
skill_path = os.path.join(root, "SKILL.md")
dir_name = os.path.basename(root)
parent_dir = os.path.basename(os.path.dirname(root))
# Default values
skill_info = {
"id": dir_name,
"path": os.path.relpath(root, os.path.dirname(skills_dir)),
"category": parent_dir if parent_dir != "skills" else "uncategorized",
"name": dir_name.replace("-", " ").title(),
"description": ""
"description": "",
"risk": "unknown",
"source": "unknown"
}
with open(skill_path, 'r', encoding='utf-8') as f:
content = f.read()
# Try to extract from frontmatter first
fm_match = re.search(r'^---\s*(.*?)\s*---', content, re.DOTALL)
if fm_match:
fm_content = fm_match.group(1)
name_fm = re.search(r'^name:\s*(.+)$', fm_content, re.MULTILINE)
desc_fm = re.search(r'^description:\s*(.+)$', fm_content, re.MULTILINE)
if name_fm:
skill_info["name"] = name_fm.group(1).strip()
if desc_fm:
skill_info["description"] = desc_fm.group(1).strip()
# Fallback to Header and First Paragraph if needed
if not skill_info["description"] or skill_info["description"] == "":
name_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if name_match and not fm_match: # Only override if no frontmatter name
skill_info["name"] = name_match.group(1).strip()
# Extract first paragraph
body = content
if fm_match:
body = content[fm_match.end():].strip()
lines = body.split('\n')
desc_lines = []
for line in lines:
if line.startswith('#') or not line.strip():
if desc_lines: break
continue
desc_lines.append(line.strip())
if desc_lines:
skill_info["description"] = " ".join(desc_lines)[:150] + "..."
try:
with open(skill_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
print(f"⚠️ Error reading {skill_path}: {e}")
continue
# Parse Metadata
metadata = parse_frontmatter(content)
# Merge Metadata
if "name" in metadata: skill_info["name"] = metadata["name"]
if "description" in metadata: skill_info["description"] = metadata["description"]
if "risk" in metadata: skill_info["risk"] = metadata["risk"]
if "source" in metadata: skill_info["source"] = metadata["source"]
# Fallback for description if missing in frontmatter (legacy support)
if not skill_info["description"]:
body = content
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if fm_match:
body = content[fm_match.end():].strip()
# Simple extraction of first non-header paragraph
lines = body.split('\n')
desc_lines = []
for line in lines:
if line.startswith('#') or not line.strip():
if desc_lines: break
continue
desc_lines.append(line.strip())
if desc_lines:
skill_info["description"] = " ".join(desc_lines)[:250].strip()
skills.append(skill_info)
# Sort validation: by name
skills.sort(key=lambda x: x["name"].lower())
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(skills, f, indent=2)
print(f"✅ Generated index with {len(skills)} skills at: {output_file}")
print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}")
return skills
if __name__ == "__main__":