Files
antigravity-skills-reference/scripts/generate_index.py
Ares 4a5f1234bb fix: harden registry tooling, make tests hermetic, and restore metadata consistency (#168)
* chore: upgrade maintenance scripts to robust PyYAML parsing

- Replaces fragile regex frontmatter parsing with PyYAML/yaml library
- Ensures multi-line descriptions and complex characters are handled safely
- Normalizes quoting and field ordering across all maintenance scripts
- Updates validator to strictly enforce description quality

* fix: restore and refine truncated skill descriptions

- Recovered 223+ truncated descriptions from git history (6.5.0 regression)
- Refined long descriptions into concise, complete sentences (<200 chars)
- Added missing descriptions for brainstorming and orchestration skills
- Manually fixed imagen skill description
- Resolved dangling links in competitor-alternatives skill

* chore: sync generated registry files and document fixes

- Regenerated skills index with normalized forward-slash paths
- Updated README and CATALOG to reflect restored descriptions
- Documented restoration and script improvements in CHANGELOG.md

* fix: restore missing skill and align metadata for full 955 count

- Renamed SKILL.MD to SKILL.md in andruia-skill-smith to ensure indexing
- Fixed risk level and missing section in andruia-skill-smith
- Synchronized all registry files for final 955 skill count

* chore(scripts): add cross-platform runners and hermetic test orchestration

* fix(scripts): harden utf-8 output and clone target writeability

* fix(skills): add missing date metadata for strict validation

* chore(index): sync generated metadata dates

* fix(catalog): normalize skill paths to prevent CI drift

* chore: sync generated registry files

* fix: enforce LF line endings for generated registry files
2026-03-01 09:38:25 +01:00

133 lines
5.2 KiB
Python

import os
import json
import re
import sys
import yaml
# Ensure UTF-8 output for Windows compatibility
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
def parse_frontmatter(content):
"""
Parses YAML frontmatter, sanitizing unquoted values containing @.
Handles single values and comma-separated lists by quoting the entire line.
"""
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if not fm_match:
return {}
yaml_text = fm_match.group(1)
# Process line by line to handle values containing @ and commas
sanitized_lines = []
for line in yaml_text.splitlines():
# Match "key: value" (handles keys with dashes like 'package-name')
match = re.match(r'^(\s*[\w-]+):\s*(.*)$', line)
if match:
key, val = match.groups()
val_s = val.strip()
# If value contains @ and isn't already quoted, wrap the whole string in double quotes
if '@' in val_s and not (val_s.startswith('"') or val_s.startswith("'")):
# Escape any existing double quotes within the value string
safe_val = val_s.replace('"', '\\"')
line = f'{key}: "{safe_val}"'
sanitized_lines.append(line)
sanitized_yaml = '\n'.join(sanitized_lines)
try:
return yaml.safe_load(sanitized_yaml) or {}
except yaml.YAMLError as e:
print(f"⚠️ YAML parsing error: {e}")
return {}
def generate_index(skills_dir, output_file):
print(f"🏗️ Generating index from: {skills_dir}")
skills = []
for root, dirs, files in os.walk(skills_dir):
# Skip .disabled or hidden directories
dirs[:] = [d for d in dirs if not d.startswith('.')]
if "SKILL.md" in files:
skill_path = os.path.join(root, "SKILL.md")
dir_name = os.path.basename(root)
parent_dir = os.path.basename(os.path.dirname(root))
# Default values
rel_path = os.path.relpath(root, os.path.dirname(skills_dir))
# Force forward slashes for cross-platform JSON compatibility
skill_info = {
"id": dir_name,
"path": rel_path.replace(os.sep, '/'),
"category": parent_dir if parent_dir != "skills" else None, # Will be overridden by frontmatter if present
"name": dir_name.replace("-", " ").title(),
"description": "",
"risk": "unknown",
"source": "unknown",
"date_added": None
}
try:
with open(skill_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
print(f"⚠️ Error reading {skill_path}: {e}")
continue
# Parse Metadata
metadata = parse_frontmatter(content)
# Merge Metadata (frontmatter takes priority)
if "name" in metadata: skill_info["name"] = metadata["name"]
if "description" in metadata: skill_info["description"] = metadata["description"]
if "risk" in metadata: skill_info["risk"] = metadata["risk"]
if "source" in metadata: skill_info["source"] = metadata["source"]
if "date_added" in metadata: skill_info["date_added"] = metadata["date_added"]
# Category: prefer frontmatter, then folder structure, then default
if "category" in metadata:
skill_info["category"] = metadata["category"]
elif skill_info["category"] is None:
skill_info["category"] = "uncategorized"
# Fallback for description if missing in frontmatter (legacy support)
if not skill_info["description"]:
body = content
fm_match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
if fm_match:
body = content[fm_match.end():].strip()
# Simple extraction of first non-header paragraph
lines = body.split('\n')
desc_lines = []
for line in lines:
if line.startswith('#') or not line.strip():
if desc_lines: break
continue
desc_lines.append(line.strip())
if desc_lines:
skill_info["description"] = " ".join(desc_lines)[:250].strip()
skills.append(skill_info)
# Sort validation: by name
skills.sort(key=lambda x: (x["name"].lower(), x["id"].lower()))
with open(output_file, 'w', encoding='utf-8', newline='\n') as f:
json.dump(skills, f, indent=2)
print(f"✅ Generated rich index with {len(skills)} skills at: {output_file}")
return skills
if __name__ == "__main__":
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
skills_path = os.path.join(base_dir, "skills")
output_path = os.path.join(base_dir, "skills_index.json")
generate_index(skills_path, output_path)