const fs = require('fs'); const path = require('path'); const { listSkillIdsRecursive, readSkill, tokenize, unique, } = require('../lib/skill-utils'); const ROOT = path.resolve(__dirname, '..'); const SKILLS_DIR = path.join(ROOT, 'skills'); const STOPWORDS = new Set([ 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'from', 'has', 'have', 'in', 'into', 'is', 'it', 'its', 'of', 'on', 'or', 'our', 'out', 'over', 'that', 'the', 'their', 'they', 'this', 'to', 'use', 'when', 'with', 'you', 'your', 'will', 'can', 'if', 'not', 'only', 'also', 'more', 'best', 'practice', 'practices', 'expert', 'specialist', 'focused', 'focus', 'master', 'modern', 'advanced', 'comprehensive', 'production', 'production-ready', 'ready', 'build', 'create', 'deliver', 'design', 'implement', 'implementation', 'strategy', 'strategies', 'patterns', 'pattern', 'workflow', 'workflows', 'guide', 'template', 'templates', 'tool', 'tools', 'project', 'projects', 'support', 'manage', 'management', 'system', 'systems', 'services', 'service', 'across', 'end', 'end-to-end', 'using', 'based', 'ensure', 'ensure', 'help', 'needs', 'need', 'focuses', 'handles', 'builds', 'make', ]); const TAG_STOPWORDS = new Set([ 'pro', 'expert', 'patterns', 'pattern', 'workflow', 'workflows', 'templates', 'template', 'toolkit', 'tools', 'tool', 'project', 'projects', 'guide', 'management', 'engineer', 'architect', 'developer', 'specialist', 'assistant', 'analysis', 'review', 'reviewer', 'automation', 'orchestration', 'scaffold', 'scaffolding', 'implementation', 'strategy', 'context', 'management', 'feature', 'features', 'smart', 'system', 'systems', 'design', 'development', 'development', 'test', 'testing', 'workflow', ]); const CATEGORY_RULES = [ { name: 'security', keywords: [ 'security', 'sast', 'compliance', 'privacy', 'threat', 'vulnerability', 'owasp', 'pci', 'gdpr', 'secrets', 'risk', 'malware', 'forensics', 'attack', 'incident', 'auth', 'mtls', 'zero', 'trust', ], }, { name: 'infrastructure', keywords: [ 'kubernetes', 'k8s', 'helm', 'terraform', 'cloud', 'network', 'devops', 'gitops', 'prometheus', 'grafana', 'observability', 'monitoring', 'logging', 'tracing', 'deployment', 'istio', 'linkerd', 'service', 'mesh', 'slo', 'sre', 'oncall', 'incident', 'pipeline', 'cicd', 'ci', 'cd', 'kafka', ], }, { name: 'data-ai', keywords: [ 'data', 'database', 'db', 'sql', 'postgres', 'mysql', 'analytics', 'etl', 'warehouse', 'dbt', 'ml', 'ai', 'llm', 'rag', 'vector', 'embedding', 'spark', 'airflow', 'cdc', 'pipeline', ], }, { name: 'development', keywords: [ 'python', 'javascript', 'typescript', 'java', 'golang', 'go', 'rust', 'csharp', 'dotnet', 'php', 'ruby', 'node', 'react', 'frontend', 'backend', 'mobile', 'ios', 'android', 'flutter', 'fastapi', 'django', 'nextjs', 'vue', 'api', ], }, { name: 'architecture', keywords: [ 'architecture', 'c4', 'microservices', 'event', 'cqrs', 'saga', 'domain', 'ddd', 'patterns', 'decision', 'adr', ], }, { name: 'testing', keywords: ['testing', 'tdd', 'unit', 'e2e', 'qa', 'test'], }, { name: 'business', keywords: [ 'business', 'market', 'sales', 'finance', 'startup', 'legal', 'hr', 'product', 'customer', 'seo', 'marketing', 'kpi', 'contract', 'employment', ], }, { name: 'workflow', keywords: ['workflow', 'orchestration', 'conductor', 'automation', 'process', 'collaboration'], }, ]; const BUNDLE_RULES = { 'core-dev': { description: 'Core development skills across languages, frameworks, and backend/frontend fundamentals.', keywords: [ 'python', 'javascript', 'typescript', 'go', 'golang', 'rust', 'java', 'node', 'frontend', 'backend', 'react', 'fastapi', 'django', 'nextjs', 'api', 'mobile', 'ios', 'android', 'flutter', 'php', 'ruby', ], }, 'security-core': { description: 'Security, privacy, and compliance essentials.', keywords: [ 'security', 'sast', 'compliance', 'threat', 'risk', 'privacy', 'secrets', 'owasp', 'gdpr', 'pci', 'vulnerability', 'auth', ], }, 'k8s-core': { description: 'Kubernetes and service mesh essentials.', keywords: ['kubernetes', 'k8s', 'helm', 'istio', 'linkerd', 'service', 'mesh'], }, 'data-core': { description: 'Data engineering and analytics foundations.', keywords: [ 'data', 'database', 'sql', 'dbt', 'airflow', 'spark', 'analytics', 'etl', 'warehouse', 'postgres', 'mysql', 'kafka', ], }, 'ops-core': { description: 'Operations, observability, and delivery pipelines.', keywords: [ 'observability', 'monitoring', 'logging', 'tracing', 'prometheus', 'grafana', 'devops', 'gitops', 'deployment', 'cicd', 'pipeline', 'slo', 'sre', 'incident', ], }, }; const CURATED_COMMON = [ 'bash-pro', 'python-pro', 'javascript-pro', 'typescript-pro', 'golang-pro', 'rust-pro', 'java-pro', 'frontend-developer', 'backend-architect', 'nodejs-backend-patterns', 'fastapi-pro', 'api-design-principles', 'sql-pro', 'database-architect', 'kubernetes-architect', 'terraform-specialist', 'observability-engineer', 'security-auditor', 'sast-configuration', 'gitops-workflow', ]; function normalizeTokens(tokens) { return unique(tokens.map(token => token.toLowerCase())).filter(Boolean); } function deriveTags(skill) { let tags = Array.isArray(skill.tags) ? skill.tags : []; tags = tags.map(tag => tag.toLowerCase()).filter(Boolean); if (!tags.length) { tags = skill.id .split('-') .map(tag => tag.toLowerCase()) .filter(tag => tag && !TAG_STOPWORDS.has(tag)); } return normalizeTokens(tags); } function detectCategory(skill, tags) { const haystack = normalizeTokens([ ...tags, ...tokenize(skill.name), ...tokenize(skill.description), ]); const haystackSet = new Set(haystack); for (const rule of CATEGORY_RULES) { for (const keyword of rule.keywords) { if (haystackSet.has(keyword)) { return rule.name; } } } return 'general'; } function buildTriggers(skill, tags) { const tokens = tokenize(`${skill.name} ${skill.description}`) .filter(token => token.length >= 2 && !STOPWORDS.has(token)); return unique([...tags, ...tokens]).slice(0, 12); } function buildAliases(skills) { const existingIds = new Set(skills.map(skill => skill.id)); const aliases = {}; const used = new Set(); for (const skill of skills) { if (skill.name && skill.name !== skill.id) { const alias = skill.name.toLowerCase(); if (!existingIds.has(alias) && !used.has(alias)) { aliases[alias] = skill.id; used.add(alias); } } const tokens = skill.id.split('-').filter(Boolean); if (skill.id.length < 28 || tokens.length < 4) continue; const deduped = []; const tokenSeen = new Set(); for (const token of tokens) { if (tokenSeen.has(token)) continue; tokenSeen.add(token); deduped.push(token); } const aliasTokens = deduped.length > 3 ? [deduped[0], deduped[1], deduped[deduped.length - 1]] : deduped; const alias = unique(aliasTokens).join('-'); if (!alias || alias === skill.id) continue; if (existingIds.has(alias) || used.has(alias)) continue; aliases[alias] = skill.id; used.add(alias); } return aliases; } function buildBundles(skills) { const bundles = {}; const skillTokens = new Map(); for (const skill of skills) { const tokens = normalizeTokens([ ...skill.tags, ...tokenize(skill.name), ...tokenize(skill.description), ]); skillTokens.set(skill.id, new Set(tokens)); } for (const [bundleName, rule] of Object.entries(BUNDLE_RULES)) { const bundleSkills = []; const keywords = rule.keywords.map(keyword => keyword.toLowerCase()); for (const skill of skills) { const tokenSet = skillTokens.get(skill.id) || new Set(); if (keywords.some(keyword => tokenSet.has(keyword))) { bundleSkills.push(skill.id); } } bundles[bundleName] = { description: rule.description, skills: bundleSkills.sort(), }; } const common = CURATED_COMMON.filter(skillId => skillTokens.has(skillId)); return { bundles, common }; } function truncate(value, limit) { if (!value || value.length <= limit) return value || ''; return `${value.slice(0, limit - 3)}...`; } function renderCatalogMarkdown(catalog) { const lines = []; lines.push('# Skill Catalog'); lines.push(''); lines.push(`Generated at: ${catalog.generatedAt}`); lines.push(''); lines.push(`Total skills: ${catalog.total}`); lines.push(''); const categories = Array.from(new Set(catalog.skills.map(skill => skill.category))).sort(); for (const category of categories) { const grouped = catalog.skills.filter(skill => skill.category === category); lines.push(`## ${category} (${grouped.length})`); lines.push(''); lines.push('| Skill | Description | Tags | Triggers |'); lines.push('| --- | --- | --- | --- |'); for (const skill of grouped) { const description = truncate(skill.description, 160).replace(/\|/g, '\\|'); const tags = skill.tags.join(', '); const triggers = skill.triggers.join(', '); lines.push(`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`); } lines.push(''); } return lines.join('\n'); } function buildCatalog() { const skillRelPaths = listSkillIdsRecursive(SKILLS_DIR); const skills = skillRelPaths.map(relPath => readSkill(SKILLS_DIR, relPath)); const catalogSkills = []; for (const skill of skills) { const tags = deriveTags(skill); const category = detectCategory(skill, tags); const triggers = buildTriggers(skill, tags); catalogSkills.push({ id: skill.id, name: skill.name, description: skill.description, category, tags, triggers, path: path.relative(ROOT, skill.path), }); } const catalog = { generatedAt: new Date().toISOString(), total: catalogSkills.length, skills: catalogSkills.sort((a, b) => a.id.localeCompare(b.id)), }; const aliases = buildAliases(catalog.skills); const bundleData = buildBundles(catalog.skills); const catalogPath = path.join(ROOT, 'data', 'catalog.json'); const catalogMarkdownPath = path.join(ROOT, 'CATALOG.md'); const bundlesPath = path.join(ROOT, 'data', 'bundles.json'); const aliasesPath = path.join(ROOT, 'data', 'aliases.json'); fs.writeFileSync(catalogPath, JSON.stringify(catalog, null, 2)); fs.writeFileSync(catalogMarkdownPath, renderCatalogMarkdown(catalog)); fs.writeFileSync( bundlesPath, JSON.stringify({ generatedAt: catalog.generatedAt, ...bundleData }, null, 2), ); fs.writeFileSync( aliasesPath, JSON.stringify({ generatedAt: catalog.generatedAt, aliases }, null, 2), ); return catalog; } if (require.main === module) { const catalog = buildCatalog(); console.log(`Generated catalog for ${catalog.total} skills.`); } module.exports = { buildCatalog, };