New skills covering 10 categories: **Security & Audit**: 007 (STRIDE/PASTA/OWASP), cred-omega (secrets management) **AI Personas**: Karpathy, Hinton, Sutskever, LeCun (4 sub-skills), Altman, Musk, Gates, Jobs, Buffett **Multi-agent Orchestration**: agent-orchestrator, task-intelligence, multi-advisor **Code Analysis**: matematico-tao (Terence Tao-inspired mathematical code analysis) **Social & Messaging**: Instagram Graph API, Telegram Bot, WhatsApp Cloud API, social-orchestrator **Image Generation**: AI Studio (Gemini), Stability AI, ComfyUI Gateway, image-studio router **Brazilian Domain**: 6 auction specialist modules, 2 legal advisors, auctioneers data scraper **Product & Growth**: design, invention, monetization, analytics, growth engine **DevOps & LLM Ops**: Docker/CI-CD/AWS, RAG/embeddings/fine-tuning **Skill Governance**: installer, sentinel auditor, context management Each skill includes: - Standardized YAML frontmatter (name, description, risk, source, tags, tools) - Structured sections (Overview, When to Use, How it Works, Best Practices) - Python scripts and reference documentation where applicable - Cross-platform compatibility (Claude Code, Antigravity, Cursor, Gemini CLI, Codex CLI) Co-authored-by: ProgramadorBrasil <214873561+ProgramadorBrasil@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
509 lines
18 KiB
Python
509 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Auto-Discovery Engine for Agent Orchestrator.
|
|
|
|
Scans the skills ecosystem for SKILL.md files, parses metadata,
|
|
and maintains a centralized registry (registry.json).
|
|
|
|
Features:
|
|
- Runs automatically on every request (called by CLAUDE.md)
|
|
- Ultra-fast via MD5 hash caching (~<100ms when nothing changed)
|
|
- Auto-includes new skills, auto-removes deleted skills
|
|
- Zero manual intervention required
|
|
|
|
Usage:
|
|
python scan_registry.py # Quick scan (hash-based)
|
|
python scan_registry.py --status # Verbose status table
|
|
python scan_registry.py --force # Full re-scan ignoring hashes
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import hashlib
|
|
import re
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# ── Configuration ──────────────────────────────────────────────────────────
|
|
|
|
# Resolve paths relative to this script's location
|
|
_SCRIPT_DIR = Path(__file__).resolve().parent
|
|
ORCHESTRATOR_DIR = _SCRIPT_DIR.parent
|
|
SKILLS_ROOT = ORCHESTRATOR_DIR.parent
|
|
DATA_DIR = ORCHESTRATOR_DIR / "data"
|
|
REGISTRY_PATH = DATA_DIR / "registry.json"
|
|
HASHES_PATH = DATA_DIR / "registry_hashes.json"
|
|
|
|
# Where to search for SKILL.md files
|
|
SEARCH_PATHS = [
|
|
SKILLS_ROOT / ".claude" / "skills", # registered skills
|
|
SKILLS_ROOT, # top-level standalone
|
|
]
|
|
MAX_DEPTH = 3 # max directory depth for SKILL.md search
|
|
|
|
# Capability keyword mapping (PT + EN)
|
|
CAPABILITY_MAP = {
|
|
"data-extraction": [
|
|
"scrape", "extract", "crawl", "parse", "harvest", "collect",
|
|
"raspar", "extrair", "coletar", "dados",
|
|
],
|
|
"messaging": [
|
|
"whatsapp", "message", "send", "chat", "notification", "sms",
|
|
"mensagem", "enviar", "notificacao", "atendimento",
|
|
],
|
|
"social-media": [
|
|
"instagram", "facebook", "twitter", "post", "stories", "reels",
|
|
"social", "engagement", "feed", "follower",
|
|
],
|
|
"government-data": [
|
|
"junta", "leiloeiro", "cadastro", "governo", "comercial",
|
|
"tribunal", "diario oficial", "certidao", "registro",
|
|
],
|
|
"web-automation": [
|
|
"browser", "selenium", "playwright", "automate", "click",
|
|
"navegador", "automatizar", "automacao",
|
|
],
|
|
"api-integration": [
|
|
"api", "endpoint", "webhook", "rest", "graph", "oauth",
|
|
"integracao", "integrar",
|
|
],
|
|
"analytics": [
|
|
"insight", "analytics", "metrics", "dashboard", "report",
|
|
"relatorio", "metricas", "analise",
|
|
],
|
|
"content-management": [
|
|
"publish", "schedule", "template", "content", "media",
|
|
"publicar", "agendar", "conteudo", "midia",
|
|
],
|
|
"legal": [
|
|
"advogado", "direito", "juridico", "lei", "processo",
|
|
"acao", "peticao", "recurso", "sentenca", "juiz",
|
|
"divorcio", "guarda", "alimentos", "pensao", "alimenticia", "inventario", "heranca", "partilha",
|
|
"acidente de trabalho", "acidente",
|
|
"familia", "criminal", "penal", "crime", "feminicidio", "maria da penha",
|
|
"violencia domestica", "medida protetiva", "stalking",
|
|
"danos morais", "responsabilidade civil", "indenizacao", "dano",
|
|
"consumidor", "cdc", "plano de saude",
|
|
"trabalhista", "clt", "rescisao", "fgts", "horas extras",
|
|
"previdenciario", "aposentadoria", "aposentar", "inss",
|
|
"imobiliario", "usucapiao", "despejo", "inquilinato",
|
|
"alienacao fiduciaria", "bem de familia",
|
|
"tributario", "imposto", "icms", "execucao fiscal",
|
|
"administrativo", "licitacao", "improbidade", "mandado de seguranca",
|
|
"empresarial", "societario", "falencia", "recuperacao judicial",
|
|
"empresa", "ltda", "cnpj", "mei", "eireli", "contrato social",
|
|
"contrato", "clausula", "contestacao", "apelacao", "agravo",
|
|
"habeas corpus", "mandado", "liminar", "tutela",
|
|
"cpc", "stj", "stf", "sumula", "jurisprudencia",
|
|
"oab", "honorarios", "custas",
|
|
],
|
|
"auction": [
|
|
"leilao", "leilao judicial", "leilao extrajudicial", "hasta publica",
|
|
"arrematacao", "arrematar", "arrematante", "lance", "desagio",
|
|
"edital leilao", "penhora", "adjudicacao", "praca",
|
|
"imissao na posse", "carta arrematacao", "vil preco",
|
|
"avaliacao imovel", "laudo", "perito", "matricula",
|
|
"leiloeiro", "comissao leiloeiro",
|
|
],
|
|
"security": [
|
|
"seguranca", "security", "owasp", "vulnerability", "incident",
|
|
"pentest", "firewall", "malware", "phishing", "cve",
|
|
"autenticacao", "criptografia", "encryption",
|
|
],
|
|
"image-generation": [
|
|
"imagem", "image", "gerar imagem", "generate image",
|
|
"stable diffusion", "comfyui", "midjourney", "dall-e",
|
|
"foto", "ilustracao", "arte", "design",
|
|
],
|
|
"monitoring": [
|
|
"monitor", "monitorar", "health", "status",
|
|
"audit", "auditoria", "sentinel", "check",
|
|
],
|
|
"context-management": [
|
|
"contexto", "context", "sessao", "session", "compactacao", "compaction",
|
|
"comprimir", "compress", "snapshot", "checkpoint", "briefing",
|
|
"continuidade", "continuity", "preservar", "preserve",
|
|
"memoria", "memory", "resumo", "summary",
|
|
"salvar estado", "save state", "context window", "janela de contexto",
|
|
"perda de dados", "data loss", "backup",
|
|
],
|
|
}
|
|
|
|
# ── Utility Functions ──────────────────────────────────────────────────────
|
|
|
|
def md5_file(path: Path) -> str:
|
|
"""Compute MD5 hash of a file."""
|
|
h = hashlib.md5()
|
|
with open(path, "rb") as f:
|
|
for chunk in iter(lambda: f.read(8192), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
def parse_yaml_frontmatter(path: Path) -> dict:
|
|
"""Extract YAML frontmatter from a SKILL.md file."""
|
|
try:
|
|
text = path.read_text(encoding="utf-8")
|
|
except Exception:
|
|
return {}
|
|
|
|
match = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL)
|
|
if not match:
|
|
return {}
|
|
|
|
try:
|
|
import yaml
|
|
return yaml.safe_load(match.group(1)) or {}
|
|
except Exception:
|
|
# Fallback: manual parsing for name/description
|
|
result = {}
|
|
block = match.group(1)
|
|
for key in ("name", "description", "version"):
|
|
m = re.search(rf'^{key}:\s*["\']?(.+?)["\']?\s*$', block, re.MULTILINE)
|
|
if m:
|
|
result[key] = m.group(1).strip()
|
|
else:
|
|
# Handle multi-line description with >- or >
|
|
m2 = re.search(rf'^{key}:\s*>-?\s*\n((?:\s+.+\n?)+)', block, re.MULTILINE)
|
|
if m2:
|
|
lines = m2.group(1).strip().split("\n")
|
|
result[key] = " ".join(line.strip() for line in lines)
|
|
return result
|
|
|
|
|
|
def find_skill_files() -> list[Path]:
|
|
"""Find all SKILL.md files in the ecosystem."""
|
|
found = set()
|
|
|
|
for base in SEARCH_PATHS:
|
|
if not base.exists():
|
|
continue
|
|
for root, dirs, files in os.walk(base):
|
|
depth = len(Path(root).relative_to(base).parts)
|
|
if depth > MAX_DEPTH:
|
|
dirs.clear()
|
|
continue
|
|
|
|
# Skip the orchestrator itself
|
|
if "agent-orchestrator" in Path(root).parts:
|
|
continue
|
|
|
|
if "SKILL.md" in files:
|
|
found.add(Path(root) / "SKILL.md")
|
|
|
|
return sorted(found)
|
|
|
|
|
|
def detect_language(skill_dir: Path) -> str:
|
|
"""Detect primary language from scripts/ directory."""
|
|
scripts_dir = skill_dir / "scripts"
|
|
if not scripts_dir.exists():
|
|
return "none"
|
|
|
|
extensions = set()
|
|
for f in scripts_dir.rglob("*"):
|
|
if f.is_file():
|
|
extensions.add(f.suffix.lower())
|
|
|
|
if ".py" in extensions:
|
|
return "python"
|
|
if ".ts" in extensions or ".js" in extensions:
|
|
return "nodejs"
|
|
if ".sh" in extensions:
|
|
return "bash"
|
|
return "none"
|
|
|
|
|
|
def extract_capabilities(description: str) -> list[str]:
|
|
"""Map description keywords to capability tags using word boundary matching."""
|
|
if not description:
|
|
return []
|
|
|
|
desc_lower = description.lower()
|
|
desc_words = set(re.findall(r'[a-zA-ZÀ-ÿ]+', desc_lower))
|
|
caps = []
|
|
for cap, keywords in CAPABILITY_MAP.items():
|
|
for kw in keywords:
|
|
# Multi-word keywords: substring match. Single-word: exact word match.
|
|
if " " in kw:
|
|
if kw in desc_lower:
|
|
caps.append(cap)
|
|
break
|
|
elif kw in desc_words:
|
|
caps.append(cap)
|
|
break
|
|
return sorted(caps)
|
|
|
|
|
|
def extract_triggers(description: str) -> list[str]:
|
|
"""Extract trigger keywords from description text using word boundary matching."""
|
|
if not description:
|
|
return []
|
|
|
|
# Collect all keywords from all capability categories
|
|
all_keywords = set()
|
|
for keywords in CAPABILITY_MAP.values():
|
|
all_keywords.update(keywords)
|
|
|
|
desc_lower = description.lower()
|
|
desc_words = set(re.findall(r'[a-zA-ZÀ-ÿ]+', desc_lower))
|
|
found = []
|
|
for kw in sorted(all_keywords):
|
|
if " " in kw:
|
|
if kw in desc_lower:
|
|
found.append(kw)
|
|
elif kw in desc_words:
|
|
found.append(kw)
|
|
return found
|
|
|
|
|
|
def assess_status(skill_dir: Path) -> str:
|
|
"""Check if skill is complete (active) or incomplete."""
|
|
skill_md = skill_dir / "SKILL.md"
|
|
if not skill_md.exists():
|
|
return "missing"
|
|
|
|
has_scripts = (skill_dir / "scripts").exists()
|
|
has_refs = (skill_dir / "references").exists()
|
|
|
|
# Parse frontmatter to check for required fields
|
|
meta = parse_yaml_frontmatter(skill_md)
|
|
has_name = bool(meta.get("name"))
|
|
has_desc = bool(meta.get("description"))
|
|
|
|
if has_name and has_desc:
|
|
return "active"
|
|
return "incomplete"
|
|
|
|
|
|
def is_registered(skill_dir: Path) -> bool:
|
|
"""Check if skill is in .claude/skills/."""
|
|
claude_skills = SKILLS_ROOT / ".claude" / "skills"
|
|
try:
|
|
skill_dir.relative_to(claude_skills)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
|
|
# ── Main Logic ─────────────────────────────────────────────────────────────
|
|
|
|
def load_hashes() -> dict:
|
|
"""Load stored hashes from registry_hashes.json."""
|
|
if HASHES_PATH.exists():
|
|
try:
|
|
return json.loads(HASHES_PATH.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
pass
|
|
return {}
|
|
|
|
|
|
def save_hashes(hashes: dict):
|
|
"""Save hashes to registry_hashes.json."""
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
HASHES_PATH.write_text(json.dumps(hashes, indent=2), encoding="utf-8")
|
|
|
|
|
|
def load_registry() -> dict:
|
|
"""Load existing registry.json."""
|
|
if REGISTRY_PATH.exists():
|
|
try:
|
|
return json.loads(REGISTRY_PATH.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
pass
|
|
return {"generated_at": None, "skills_root": str(SKILLS_ROOT), "skills": []}
|
|
|
|
|
|
def save_registry(registry: dict):
|
|
"""Save registry.json."""
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
registry["generated_at"] = datetime.now().isoformat()
|
|
REGISTRY_PATH.write_text(json.dumps(registry, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
|
|
|
|
def build_skill_entry(skill_md_path: Path) -> dict:
|
|
"""Build a registry entry from a SKILL.md file."""
|
|
skill_dir = skill_md_path.parent
|
|
meta = parse_yaml_frontmatter(skill_md_path)
|
|
description = meta.get("description", "")
|
|
|
|
# Support explicit capabilities in frontmatter
|
|
explicit_caps = meta.get("capabilities", [])
|
|
if isinstance(explicit_caps, str):
|
|
explicit_caps = [c.strip() for c in explicit_caps.split(",")]
|
|
|
|
auto_caps = extract_capabilities(description)
|
|
all_caps = sorted(set(auto_caps + explicit_caps))
|
|
|
|
return {
|
|
"name": meta.get("name", skill_dir.name),
|
|
"description": description,
|
|
"version": meta.get("version", ""),
|
|
"location": str(skill_dir),
|
|
"skill_md": str(skill_md_path),
|
|
"registered": is_registered(skill_dir),
|
|
"has_scripts": (skill_dir / "scripts").exists(),
|
|
"has_references": (skill_dir / "references").exists(),
|
|
"has_data": (skill_dir / "data").exists(),
|
|
"capabilities": all_caps,
|
|
"triggers": extract_triggers(description),
|
|
"language": detect_language(skill_dir),
|
|
"status": assess_status(skill_dir),
|
|
"last_modified": datetime.fromtimestamp(
|
|
skill_md_path.stat().st_mtime
|
|
).isoformat(),
|
|
}
|
|
|
|
|
|
def scan(force: bool = False) -> dict:
|
|
"""
|
|
Main scan function.
|
|
|
|
With hash caching:
|
|
1. Find all SKILL.md files
|
|
2. Compare MD5 hashes with stored values
|
|
3. Only re-parse files that changed, were added, or removed
|
|
4. Update registry incrementally
|
|
"""
|
|
current_files = find_skill_files()
|
|
current_paths = {str(f): f for f in current_files}
|
|
|
|
stored_hashes = load_hashes()
|
|
registry = load_registry()
|
|
|
|
# Build lookup of existing registry entries by skill_md path
|
|
existing_by_path = {}
|
|
for entry in registry.get("skills", []):
|
|
existing_by_path[entry.get("skill_md", "")] = entry
|
|
|
|
# Compute current hashes
|
|
new_hashes = {}
|
|
changed = False
|
|
|
|
for path_str, path_obj in current_paths.items():
|
|
current_hash = md5_file(path_obj)
|
|
new_hashes[path_str] = current_hash
|
|
|
|
if force or path_str not in stored_hashes or stored_hashes[path_str] != current_hash:
|
|
# New or modified - rebuild entry
|
|
entry = build_skill_entry(path_obj)
|
|
existing_by_path[path_str] = entry
|
|
changed = True
|
|
|
|
# Detect removed skills
|
|
for old_path in list(existing_by_path.keys()):
|
|
if old_path not in current_paths and old_path != "":
|
|
del existing_by_path[old_path]
|
|
changed = True
|
|
|
|
# Check if file set changed (additions/removals)
|
|
if set(new_hashes.keys()) != set(stored_hashes.keys()):
|
|
changed = True
|
|
|
|
# Deduplicate by skill name (case-insensitive).
|
|
# When the same skill exists in both skills/ and .claude/skills/,
|
|
# prefer the primary location (skills/) over the registered copy.
|
|
if changed or not REGISTRY_PATH.exists():
|
|
by_name = {}
|
|
for entry in existing_by_path.values():
|
|
name = entry.get("name", "").lower()
|
|
if not name:
|
|
continue
|
|
if name not in by_name:
|
|
by_name[name] = entry
|
|
else:
|
|
# Prefer the version NOT in .claude/skills/ (the primary source)
|
|
existing = by_name[name]
|
|
existing_is_registered = existing.get("registered", False)
|
|
new_is_registered = entry.get("registered", False)
|
|
if existing_is_registered and not new_is_registered:
|
|
by_name[name] = entry
|
|
# If both are primary or both registered, keep first found
|
|
|
|
registry["skills"] = sorted(by_name.values(), key=lambda s: s.get("name", ""))
|
|
save_registry(registry)
|
|
save_hashes(new_hashes)
|
|
return registry
|
|
else:
|
|
# Nothing changed, return existing
|
|
return registry
|
|
|
|
|
|
def print_status(registry: dict):
|
|
"""Print a formatted status table."""
|
|
skills = registry.get("skills", [])
|
|
|
|
if not skills:
|
|
print("No skills found in the ecosystem.")
|
|
return
|
|
|
|
print(f"\n{'='*80}")
|
|
print(f" Agent Orchestrator - Skill Registry Status")
|
|
print(f" Scanned at: {registry.get('generated_at', 'N/A')}")
|
|
print(f" Root: {registry.get('skills_root', 'N/A')}")
|
|
print(f"{'='*80}\n")
|
|
|
|
# Header
|
|
print(f" {'Name':<22} {'Status':<12} {'Lang':<10} {'Registered':<12} {'Capabilities'}")
|
|
print(f" {'-'*22} {'-'*12} {'-'*10} {'-'*12} {'-'*30}")
|
|
|
|
for s in sorted(skills, key=lambda x: x.get("name", "")):
|
|
name = s.get("name", "?")[:20]
|
|
status = s.get("status", "?")
|
|
lang = s.get("language", "none")
|
|
reg = "Yes" if s.get("registered") else "No"
|
|
caps = ", ".join(s.get("capabilities", []))[:30]
|
|
print(f" {name:<22} {status:<12} {lang:<10} {reg:<12} {caps}")
|
|
|
|
print(f"\n Total: {len(skills)} skills")
|
|
|
|
# Recommendations
|
|
unregistered = [s for s in skills if not s.get("registered")]
|
|
incomplete = [s for s in skills if s.get("status") == "incomplete"]
|
|
|
|
if unregistered:
|
|
print(f"\n [!] {len(unregistered)} skill(s) not registered in .claude/skills/:")
|
|
for s in unregistered:
|
|
print(f" - {s['name']} ({s['location']})")
|
|
|
|
if incomplete:
|
|
print(f"\n [!] {len(incomplete)} skill(s) with incomplete status:")
|
|
for s in incomplete:
|
|
print(f" - {s['name']} ({s['location']})")
|
|
|
|
print()
|
|
|
|
|
|
# ── CLI Entry Point ────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
force = "--force" in sys.argv
|
|
show_status = "--status" in sys.argv
|
|
|
|
registry = scan(force=force)
|
|
|
|
if show_status:
|
|
print_status(registry)
|
|
else:
|
|
# Default: output JSON summary for Claude to parse
|
|
skills = registry.get("skills", [])
|
|
summary = {
|
|
"total": len(skills),
|
|
"active": len([s for s in skills if s.get("status") == "active"]),
|
|
"incomplete": len([s for s in skills if s.get("status") == "incomplete"]),
|
|
"skills": [
|
|
{
|
|
"name": s.get("name"),
|
|
"status": s.get("status"),
|
|
"capabilities": s.get("capabilities", []),
|
|
}
|
|
for s in skills
|
|
],
|
|
}
|
|
print(json.dumps(summary, indent=2, ensure_ascii=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|