feat: implement Phase 2 Automation & CI (validate_skills, generate_index, ci.yml) [skip ci]

2026-01-25 19:19:51 +01:00
parent 1557826c5d
commit 4fe8a1e6a4
4 changed files with 1210 additions and 324 deletions
--- a/scripts/generate_index.py
+++ b/scripts/generate_index.py
@@ -2,69 +2,90 @@ import os
 import json
 import re

+def parse_frontmatter(content):
+    """
+    Simple frontmatter parser using regex (consistent with validate_skills.py).
+    """
+    fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
+    if not fm_match:
+        return {}
+    
+    fm_text = fm_match.group(1)
+    metadata = {}
+    for line in fm_text.split('\n'):
+        if ':' in line:
+            key, val = line.split(':', 1)
+            metadata[key.strip()] = val.strip().strip('"').strip("'")
+    return metadata
+
 def generate_index(skills_dir, output_file):
    print(f"🏗️ Generating index from: {skills_dir}")
    skills = []

    for root, dirs, files in os.walk(skills_dir):
-        # Skip .disabled directories
-        dirs[:] = [d for d in dirs if d != '.disabled']
+        # Skip .disabled or hidden directories
+        dirs[:] = [d for d in dirs if not d.startswith('.')]
+        
        if "SKILL.md" in files:
            skill_path = os.path.join(root, "SKILL.md")
            dir_name = os.path.basename(root)
+            parent_dir = os.path.basename(os.path.dirname(root))
            
+            # Default values
            skill_info = {
                "id": dir_name,
                "path": os.path.relpath(root, os.path.dirname(skills_dir)),
+                "category": parent_dir if parent_dir != "skills" else "uncategorized",
                "name": dir_name.replace("-", " ").title(),
-                "description": ""
+                "description": "",
+                "risk": "unknown",
+                "source": "unknown"
            }
            
-            with open(skill_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-                
-                # Try to extract from frontmatter first
-                fm_match = re.search(r'^---\s*(.*?)\s*---', content, re.DOTALL)
-                if fm_match:
-                    fm_content = fm_match.group(1)
-                    name_fm = re.search(r'^name:\s*(.+)$', fm_content, re.MULTILINE)
-                    desc_fm = re.search(r'^description:\s*(.+)$', fm_content, re.MULTILINE)
-                    
-                    if name_fm:
-                        skill_info["name"] = name_fm.group(1).strip()
-                    if desc_fm:
-                        skill_info["description"] = desc_fm.group(1).strip()
-                
-                # Fallback to Header and First Paragraph if needed
-                if not skill_info["description"] or skill_info["description"] == "":
-                    name_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
-                    if name_match and not fm_match: # Only override if no frontmatter name
-                         skill_info["name"] = name_match.group(1).strip()
-                    
-                    # Extract first paragraph
-                    body = content
-                    if fm_match:
-                        body = content[fm_match.end():].strip()
-                    
-                    lines = body.split('\n')
-                    desc_lines = []
-                    for line in lines:
-                        if line.startswith('#') or not line.strip():
-                            if desc_lines: break
-                            continue
-                        desc_lines.append(line.strip())
-                    
-                    if desc_lines:
-                        skill_info["description"] = " ".join(desc_lines)[:150] + "..."
+            try:
+                with open(skill_path, 'r', encoding='utf-8') as f:
+                    content = f.read()
+            except Exception as e:
+                print(f"⚠️ Error reading {skill_path}: {e}")
+                continue
+
+            # Parse Metadata
+            metadata = parse_frontmatter(content)
            
+            # Merge Metadata
+            if "name" in metadata: skill_info["name"] = metadata["name"]
+            if "description" in metadata: skill_info["description"] = metadata["description"]
+            if "risk" in metadata: skill_info["risk"] = metadata["risk"]
+            if "source" in metadata: skill_info["source"] = metadata["source"]
+            
+            # Fallback for description if missing in frontmatter (legacy support)
+            if not skill_info["description"]:
+                body = content
+                fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
+                if fm_match:
+                    body = content[fm_match.end():].strip()
+                
+                # Simple extraction of first non-header paragraph
+                lines = body.split('\n')
+                desc_lines = []
+                for line in lines:
+                    if line.startswith('#') or not line.strip():
+                        if desc_lines: break
+                        continue
+                    desc_lines.append(line.strip())
+                
+                if desc_lines:
+                    skill_info["description"] = " ".join(desc_lines)[:250].strip()
+
            skills.append(skill_info)

+    # Sort validation: by name
    skills.sort(key=lambda x: x["name"].lower())

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(skills, f, indent=2)
    
-    print(f"✅ Generated index with {len(skills)} skills at: {output_file}")
+    print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}")
    return skills

 if __name__ == "__main__":