Files
antigravity-skills-reference/skills/agent-orchestrator/scripts/scan_registry.py
ProgramadorBrasil 61ec71c5c7 feat: add 52 specialized AI agent skills (#217)
New skills covering 10 categories:

**Security & Audit**: 007 (STRIDE/PASTA/OWASP), cred-omega (secrets management)
**AI Personas**: Karpathy, Hinton, Sutskever, LeCun (4 sub-skills), Altman, Musk, Gates, Jobs, Buffett
**Multi-agent Orchestration**: agent-orchestrator, task-intelligence, multi-advisor
**Code Analysis**: matematico-tao (Terence Tao-inspired mathematical code analysis)
**Social & Messaging**: Instagram Graph API, Telegram Bot, WhatsApp Cloud API, social-orchestrator
**Image Generation**: AI Studio (Gemini), Stability AI, ComfyUI Gateway, image-studio router
**Brazilian Domain**: 6 auction specialist modules, 2 legal advisors, auctioneers data scraper
**Product & Growth**: design, invention, monetization, analytics, growth engine
**DevOps & LLM Ops**: Docker/CI-CD/AWS, RAG/embeddings/fine-tuning
**Skill Governance**: installer, sentinel auditor, context management

Each skill includes:
- Standardized YAML frontmatter (name, description, risk, source, tags, tools)
- Structured sections (Overview, When to Use, How it Works, Best Practices)
- Python scripts and reference documentation where applicable
- Cross-platform compatibility (Claude Code, Antigravity, Cursor, Gemini CLI, Codex CLI)

Co-authored-by: ProgramadorBrasil <214873561+ProgramadorBrasil@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 10:04:07 +01:00

509 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Auto-Discovery Engine for Agent Orchestrator.
Scans the skills ecosystem for SKILL.md files, parses metadata,
and maintains a centralized registry (registry.json).
Features:
- Runs automatically on every request (called by CLAUDE.md)
- Ultra-fast via MD5 hash caching (~<100ms when nothing changed)
- Auto-includes new skills, auto-removes deleted skills
- Zero manual intervention required
Usage:
python scan_registry.py # Quick scan (hash-based)
python scan_registry.py --status # Verbose status table
python scan_registry.py --force # Full re-scan ignoring hashes
"""
import os
import sys
import json
import hashlib
import re
from pathlib import Path
from datetime import datetime
# ── Configuration ──────────────────────────────────────────────────────────
# Resolve paths relative to this script's location
_SCRIPT_DIR = Path(__file__).resolve().parent
ORCHESTRATOR_DIR = _SCRIPT_DIR.parent
SKILLS_ROOT = ORCHESTRATOR_DIR.parent
DATA_DIR = ORCHESTRATOR_DIR / "data"
REGISTRY_PATH = DATA_DIR / "registry.json"
HASHES_PATH = DATA_DIR / "registry_hashes.json"
# Where to search for SKILL.md files
SEARCH_PATHS = [
SKILLS_ROOT / ".claude" / "skills", # registered skills
SKILLS_ROOT, # top-level standalone
]
MAX_DEPTH = 3 # max directory depth for SKILL.md search
# Capability keyword mapping (PT + EN)
CAPABILITY_MAP = {
"data-extraction": [
"scrape", "extract", "crawl", "parse", "harvest", "collect",
"raspar", "extrair", "coletar", "dados",
],
"messaging": [
"whatsapp", "message", "send", "chat", "notification", "sms",
"mensagem", "enviar", "notificacao", "atendimento",
],
"social-media": [
"instagram", "facebook", "twitter", "post", "stories", "reels",
"social", "engagement", "feed", "follower",
],
"government-data": [
"junta", "leiloeiro", "cadastro", "governo", "comercial",
"tribunal", "diario oficial", "certidao", "registro",
],
"web-automation": [
"browser", "selenium", "playwright", "automate", "click",
"navegador", "automatizar", "automacao",
],
"api-integration": [
"api", "endpoint", "webhook", "rest", "graph", "oauth",
"integracao", "integrar",
],
"analytics": [
"insight", "analytics", "metrics", "dashboard", "report",
"relatorio", "metricas", "analise",
],
"content-management": [
"publish", "schedule", "template", "content", "media",
"publicar", "agendar", "conteudo", "midia",
],
"legal": [
"advogado", "direito", "juridico", "lei", "processo",
"acao", "peticao", "recurso", "sentenca", "juiz",
"divorcio", "guarda", "alimentos", "pensao", "alimenticia", "inventario", "heranca", "partilha",
"acidente de trabalho", "acidente",
"familia", "criminal", "penal", "crime", "feminicidio", "maria da penha",
"violencia domestica", "medida protetiva", "stalking",
"danos morais", "responsabilidade civil", "indenizacao", "dano",
"consumidor", "cdc", "plano de saude",
"trabalhista", "clt", "rescisao", "fgts", "horas extras",
"previdenciario", "aposentadoria", "aposentar", "inss",
"imobiliario", "usucapiao", "despejo", "inquilinato",
"alienacao fiduciaria", "bem de familia",
"tributario", "imposto", "icms", "execucao fiscal",
"administrativo", "licitacao", "improbidade", "mandado de seguranca",
"empresarial", "societario", "falencia", "recuperacao judicial",
"empresa", "ltda", "cnpj", "mei", "eireli", "contrato social",
"contrato", "clausula", "contestacao", "apelacao", "agravo",
"habeas corpus", "mandado", "liminar", "tutela",
"cpc", "stj", "stf", "sumula", "jurisprudencia",
"oab", "honorarios", "custas",
],
"auction": [
"leilao", "leilao judicial", "leilao extrajudicial", "hasta publica",
"arrematacao", "arrematar", "arrematante", "lance", "desagio",
"edital leilao", "penhora", "adjudicacao", "praca",
"imissao na posse", "carta arrematacao", "vil preco",
"avaliacao imovel", "laudo", "perito", "matricula",
"leiloeiro", "comissao leiloeiro",
],
"security": [
"seguranca", "security", "owasp", "vulnerability", "incident",
"pentest", "firewall", "malware", "phishing", "cve",
"autenticacao", "criptografia", "encryption",
],
"image-generation": [
"imagem", "image", "gerar imagem", "generate image",
"stable diffusion", "comfyui", "midjourney", "dall-e",
"foto", "ilustracao", "arte", "design",
],
"monitoring": [
"monitor", "monitorar", "health", "status",
"audit", "auditoria", "sentinel", "check",
],
"context-management": [
"contexto", "context", "sessao", "session", "compactacao", "compaction",
"comprimir", "compress", "snapshot", "checkpoint", "briefing",
"continuidade", "continuity", "preservar", "preserve",
"memoria", "memory", "resumo", "summary",
"salvar estado", "save state", "context window", "janela de contexto",
"perda de dados", "data loss", "backup",
],
}
# ── Utility Functions ──────────────────────────────────────────────────────
def md5_file(path: Path) -> str:
"""Compute MD5 hash of a file."""
h = hashlib.md5()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()
def parse_yaml_frontmatter(path: Path) -> dict:
"""Extract YAML frontmatter from a SKILL.md file."""
try:
text = path.read_text(encoding="utf-8")
except Exception:
return {}
match = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL)
if not match:
return {}
try:
import yaml
return yaml.safe_load(match.group(1)) or {}
except Exception:
# Fallback: manual parsing for name/description
result = {}
block = match.group(1)
for key in ("name", "description", "version"):
m = re.search(rf'^{key}:\s*["\']?(.+?)["\']?\s*$', block, re.MULTILINE)
if m:
result[key] = m.group(1).strip()
else:
# Handle multi-line description with >- or >
m2 = re.search(rf'^{key}:\s*>-?\s*\n((?:\s+.+\n?)+)', block, re.MULTILINE)
if m2:
lines = m2.group(1).strip().split("\n")
result[key] = " ".join(line.strip() for line in lines)
return result
def find_skill_files() -> list[Path]:
"""Find all SKILL.md files in the ecosystem."""
found = set()
for base in SEARCH_PATHS:
if not base.exists():
continue
for root, dirs, files in os.walk(base):
depth = len(Path(root).relative_to(base).parts)
if depth > MAX_DEPTH:
dirs.clear()
continue
# Skip the orchestrator itself
if "agent-orchestrator" in Path(root).parts:
continue
if "SKILL.md" in files:
found.add(Path(root) / "SKILL.md")
return sorted(found)
def detect_language(skill_dir: Path) -> str:
"""Detect primary language from scripts/ directory."""
scripts_dir = skill_dir / "scripts"
if not scripts_dir.exists():
return "none"
extensions = set()
for f in scripts_dir.rglob("*"):
if f.is_file():
extensions.add(f.suffix.lower())
if ".py" in extensions:
return "python"
if ".ts" in extensions or ".js" in extensions:
return "nodejs"
if ".sh" in extensions:
return "bash"
return "none"
def extract_capabilities(description: str) -> list[str]:
"""Map description keywords to capability tags using word boundary matching."""
if not description:
return []
desc_lower = description.lower()
desc_words = set(re.findall(r'[a-zA-ZÀ-ÿ]+', desc_lower))
caps = []
for cap, keywords in CAPABILITY_MAP.items():
for kw in keywords:
# Multi-word keywords: substring match. Single-word: exact word match.
if " " in kw:
if kw in desc_lower:
caps.append(cap)
break
elif kw in desc_words:
caps.append(cap)
break
return sorted(caps)
def extract_triggers(description: str) -> list[str]:
"""Extract trigger keywords from description text using word boundary matching."""
if not description:
return []
# Collect all keywords from all capability categories
all_keywords = set()
for keywords in CAPABILITY_MAP.values():
all_keywords.update(keywords)
desc_lower = description.lower()
desc_words = set(re.findall(r'[a-zA-ZÀ-ÿ]+', desc_lower))
found = []
for kw in sorted(all_keywords):
if " " in kw:
if kw in desc_lower:
found.append(kw)
elif kw in desc_words:
found.append(kw)
return found
def assess_status(skill_dir: Path) -> str:
"""Check if skill is complete (active) or incomplete."""
skill_md = skill_dir / "SKILL.md"
if not skill_md.exists():
return "missing"
has_scripts = (skill_dir / "scripts").exists()
has_refs = (skill_dir / "references").exists()
# Parse frontmatter to check for required fields
meta = parse_yaml_frontmatter(skill_md)
has_name = bool(meta.get("name"))
has_desc = bool(meta.get("description"))
if has_name and has_desc:
return "active"
return "incomplete"
def is_registered(skill_dir: Path) -> bool:
"""Check if skill is in .claude/skills/."""
claude_skills = SKILLS_ROOT / ".claude" / "skills"
try:
skill_dir.relative_to(claude_skills)
return True
except ValueError:
return False
# ── Main Logic ─────────────────────────────────────────────────────────────
def load_hashes() -> dict:
"""Load stored hashes from registry_hashes.json."""
if HASHES_PATH.exists():
try:
return json.loads(HASHES_PATH.read_text(encoding="utf-8"))
except Exception:
pass
return {}
def save_hashes(hashes: dict):
"""Save hashes to registry_hashes.json."""
DATA_DIR.mkdir(parents=True, exist_ok=True)
HASHES_PATH.write_text(json.dumps(hashes, indent=2), encoding="utf-8")
def load_registry() -> dict:
"""Load existing registry.json."""
if REGISTRY_PATH.exists():
try:
return json.loads(REGISTRY_PATH.read_text(encoding="utf-8"))
except Exception:
pass
return {"generated_at": None, "skills_root": str(SKILLS_ROOT), "skills": []}
def save_registry(registry: dict):
"""Save registry.json."""
DATA_DIR.mkdir(parents=True, exist_ok=True)
registry["generated_at"] = datetime.now().isoformat()
REGISTRY_PATH.write_text(json.dumps(registry, indent=2, ensure_ascii=False), encoding="utf-8")
def build_skill_entry(skill_md_path: Path) -> dict:
"""Build a registry entry from a SKILL.md file."""
skill_dir = skill_md_path.parent
meta = parse_yaml_frontmatter(skill_md_path)
description = meta.get("description", "")
# Support explicit capabilities in frontmatter
explicit_caps = meta.get("capabilities", [])
if isinstance(explicit_caps, str):
explicit_caps = [c.strip() for c in explicit_caps.split(",")]
auto_caps = extract_capabilities(description)
all_caps = sorted(set(auto_caps + explicit_caps))
return {
"name": meta.get("name", skill_dir.name),
"description": description,
"version": meta.get("version", ""),
"location": str(skill_dir),
"skill_md": str(skill_md_path),
"registered": is_registered(skill_dir),
"has_scripts": (skill_dir / "scripts").exists(),
"has_references": (skill_dir / "references").exists(),
"has_data": (skill_dir / "data").exists(),
"capabilities": all_caps,
"triggers": extract_triggers(description),
"language": detect_language(skill_dir),
"status": assess_status(skill_dir),
"last_modified": datetime.fromtimestamp(
skill_md_path.stat().st_mtime
).isoformat(),
}
def scan(force: bool = False) -> dict:
"""
Main scan function.
With hash caching:
1. Find all SKILL.md files
2. Compare MD5 hashes with stored values
3. Only re-parse files that changed, were added, or removed
4. Update registry incrementally
"""
current_files = find_skill_files()
current_paths = {str(f): f for f in current_files}
stored_hashes = load_hashes()
registry = load_registry()
# Build lookup of existing registry entries by skill_md path
existing_by_path = {}
for entry in registry.get("skills", []):
existing_by_path[entry.get("skill_md", "")] = entry
# Compute current hashes
new_hashes = {}
changed = False
for path_str, path_obj in current_paths.items():
current_hash = md5_file(path_obj)
new_hashes[path_str] = current_hash
if force or path_str not in stored_hashes or stored_hashes[path_str] != current_hash:
# New or modified - rebuild entry
entry = build_skill_entry(path_obj)
existing_by_path[path_str] = entry
changed = True
# Detect removed skills
for old_path in list(existing_by_path.keys()):
if old_path not in current_paths and old_path != "":
del existing_by_path[old_path]
changed = True
# Check if file set changed (additions/removals)
if set(new_hashes.keys()) != set(stored_hashes.keys()):
changed = True
# Deduplicate by skill name (case-insensitive).
# When the same skill exists in both skills/ and .claude/skills/,
# prefer the primary location (skills/) over the registered copy.
if changed or not REGISTRY_PATH.exists():
by_name = {}
for entry in existing_by_path.values():
name = entry.get("name", "").lower()
if not name:
continue
if name not in by_name:
by_name[name] = entry
else:
# Prefer the version NOT in .claude/skills/ (the primary source)
existing = by_name[name]
existing_is_registered = existing.get("registered", False)
new_is_registered = entry.get("registered", False)
if existing_is_registered and not new_is_registered:
by_name[name] = entry
# If both are primary or both registered, keep first found
registry["skills"] = sorted(by_name.values(), key=lambda s: s.get("name", ""))
save_registry(registry)
save_hashes(new_hashes)
return registry
else:
# Nothing changed, return existing
return registry
def print_status(registry: dict):
"""Print a formatted status table."""
skills = registry.get("skills", [])
if not skills:
print("No skills found in the ecosystem.")
return
print(f"\n{'='*80}")
print(f" Agent Orchestrator - Skill Registry Status")
print(f" Scanned at: {registry.get('generated_at', 'N/A')}")
print(f" Root: {registry.get('skills_root', 'N/A')}")
print(f"{'='*80}\n")
# Header
print(f" {'Name':<22} {'Status':<12} {'Lang':<10} {'Registered':<12} {'Capabilities'}")
print(f" {'-'*22} {'-'*12} {'-'*10} {'-'*12} {'-'*30}")
for s in sorted(skills, key=lambda x: x.get("name", "")):
name = s.get("name", "?")[:20]
status = s.get("status", "?")
lang = s.get("language", "none")
reg = "Yes" if s.get("registered") else "No"
caps = ", ".join(s.get("capabilities", []))[:30]
print(f" {name:<22} {status:<12} {lang:<10} {reg:<12} {caps}")
print(f"\n Total: {len(skills)} skills")
# Recommendations
unregistered = [s for s in skills if not s.get("registered")]
incomplete = [s for s in skills if s.get("status") == "incomplete"]
if unregistered:
print(f"\n [!] {len(unregistered)} skill(s) not registered in .claude/skills/:")
for s in unregistered:
print(f" - {s['name']} ({s['location']})")
if incomplete:
print(f"\n [!] {len(incomplete)} skill(s) with incomplete status:")
for s in incomplete:
print(f" - {s['name']} ({s['location']})")
print()
# ── CLI Entry Point ────────────────────────────────────────────────────────
def main():
force = "--force" in sys.argv
show_status = "--status" in sys.argv
registry = scan(force=force)
if show_status:
print_status(registry)
else:
# Default: output JSON summary for Claude to parse
skills = registry.get("skills", [])
summary = {
"total": len(skills),
"active": len([s for s in skills if s.get("status") == "active"]),
"incomplete": len([s for s in skills if s.get("status") == "incomplete"]),
"skills": [
{
"name": s.get("name"),
"status": s.get("status"),
"capabilities": s.get("capabilities", []),
}
for s in skills
],
}
print(json.dumps(summary, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()