refactor: flatten Microsoft skills from nested to flat directory structure
Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter 'name' field as the flat directory name under skills/, replacing the nested skills/official/microsoft/<lang>/<category>/<service>/ hierarchy. This fixes CI failures caused by the indexing, validation, and catalog scripts expecting skills/<id>/SKILL.md (depth 1). Changes: - Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection - Update scripts/tests/inspect_microsoft_repo.py for flat name mapping - Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks - Delete skills/official/ nested directory - Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/) - Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json) - Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
This commit is contained in:
@@ -1,161 +1,454 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const {
|
||||
listSkillIdsRecursive,
|
||||
readSkill,
|
||||
tokenize,
|
||||
unique,
|
||||
} = require('../lib/skill-utils');
|
||||
} = require("../lib/skill-utils");
|
||||
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const SKILLS_DIR = path.join(ROOT, 'skills');
|
||||
const ROOT = path.resolve(__dirname, "..");
|
||||
const SKILLS_DIR = path.join(ROOT, "skills");
|
||||
|
||||
const STOPWORDS = new Set([
|
||||
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'from', 'has', 'have', 'in', 'into',
|
||||
'is', 'it', 'its', 'of', 'on', 'or', 'our', 'out', 'over', 'that', 'the', 'their', 'they', 'this',
|
||||
'to', 'use', 'when', 'with', 'you', 'your', 'will', 'can', 'if', 'not', 'only', 'also', 'more',
|
||||
'best', 'practice', 'practices', 'expert', 'specialist', 'focused', 'focus', 'master', 'modern',
|
||||
'advanced', 'comprehensive', 'production', 'production-ready', 'ready', 'build', 'create', 'deliver',
|
||||
'design', 'implement', 'implementation', 'strategy', 'strategies', 'patterns', 'pattern', 'workflow',
|
||||
'workflows', 'guide', 'template', 'templates', 'tool', 'tools', 'project', 'projects', 'support',
|
||||
'manage', 'management', 'system', 'systems', 'services', 'service', 'across', 'end', 'end-to-end',
|
||||
'using', 'based', 'ensure', 'ensure', 'help', 'needs', 'need', 'focuses', 'handles', 'builds', 'make',
|
||||
"a",
|
||||
"an",
|
||||
"and",
|
||||
"are",
|
||||
"as",
|
||||
"at",
|
||||
"be",
|
||||
"but",
|
||||
"by",
|
||||
"for",
|
||||
"from",
|
||||
"has",
|
||||
"have",
|
||||
"in",
|
||||
"into",
|
||||
"is",
|
||||
"it",
|
||||
"its",
|
||||
"of",
|
||||
"on",
|
||||
"or",
|
||||
"our",
|
||||
"out",
|
||||
"over",
|
||||
"that",
|
||||
"the",
|
||||
"their",
|
||||
"they",
|
||||
"this",
|
||||
"to",
|
||||
"use",
|
||||
"when",
|
||||
"with",
|
||||
"you",
|
||||
"your",
|
||||
"will",
|
||||
"can",
|
||||
"if",
|
||||
"not",
|
||||
"only",
|
||||
"also",
|
||||
"more",
|
||||
"best",
|
||||
"practice",
|
||||
"practices",
|
||||
"expert",
|
||||
"specialist",
|
||||
"focused",
|
||||
"focus",
|
||||
"master",
|
||||
"modern",
|
||||
"advanced",
|
||||
"comprehensive",
|
||||
"production",
|
||||
"production-ready",
|
||||
"ready",
|
||||
"build",
|
||||
"create",
|
||||
"deliver",
|
||||
"design",
|
||||
"implement",
|
||||
"implementation",
|
||||
"strategy",
|
||||
"strategies",
|
||||
"patterns",
|
||||
"pattern",
|
||||
"workflow",
|
||||
"workflows",
|
||||
"guide",
|
||||
"template",
|
||||
"templates",
|
||||
"tool",
|
||||
"tools",
|
||||
"project",
|
||||
"projects",
|
||||
"support",
|
||||
"manage",
|
||||
"management",
|
||||
"system",
|
||||
"systems",
|
||||
"services",
|
||||
"service",
|
||||
"across",
|
||||
"end",
|
||||
"end-to-end",
|
||||
"using",
|
||||
"based",
|
||||
"ensure",
|
||||
"ensure",
|
||||
"help",
|
||||
"needs",
|
||||
"need",
|
||||
"focuses",
|
||||
"handles",
|
||||
"builds",
|
||||
"make",
|
||||
]);
|
||||
|
||||
const TAG_STOPWORDS = new Set([
|
||||
'pro', 'expert', 'patterns', 'pattern', 'workflow', 'workflows', 'templates', 'template', 'toolkit',
|
||||
'tools', 'tool', 'project', 'projects', 'guide', 'management', 'engineer', 'architect', 'developer',
|
||||
'specialist', 'assistant', 'analysis', 'review', 'reviewer', 'automation', 'orchestration', 'scaffold',
|
||||
'scaffolding', 'implementation', 'strategy', 'context', 'management', 'feature', 'features', 'smart',
|
||||
'system', 'systems', 'design', 'development', 'development', 'test', 'testing', 'workflow',
|
||||
"pro",
|
||||
"expert",
|
||||
"patterns",
|
||||
"pattern",
|
||||
"workflow",
|
||||
"workflows",
|
||||
"templates",
|
||||
"template",
|
||||
"toolkit",
|
||||
"tools",
|
||||
"tool",
|
||||
"project",
|
||||
"projects",
|
||||
"guide",
|
||||
"management",
|
||||
"engineer",
|
||||
"architect",
|
||||
"developer",
|
||||
"specialist",
|
||||
"assistant",
|
||||
"analysis",
|
||||
"review",
|
||||
"reviewer",
|
||||
"automation",
|
||||
"orchestration",
|
||||
"scaffold",
|
||||
"scaffolding",
|
||||
"implementation",
|
||||
"strategy",
|
||||
"context",
|
||||
"management",
|
||||
"feature",
|
||||
"features",
|
||||
"smart",
|
||||
"system",
|
||||
"systems",
|
||||
"design",
|
||||
"development",
|
||||
"development",
|
||||
"test",
|
||||
"testing",
|
||||
"workflow",
|
||||
]);
|
||||
|
||||
const CATEGORY_RULES = [
|
||||
{
|
||||
name: 'security',
|
||||
name: "security",
|
||||
keywords: [
|
||||
'security', 'sast', 'compliance', 'privacy', 'threat', 'vulnerability', 'owasp', 'pci', 'gdpr',
|
||||
'secrets', 'risk', 'malware', 'forensics', 'attack', 'incident', 'auth', 'mtls', 'zero', 'trust',
|
||||
"security",
|
||||
"sast",
|
||||
"compliance",
|
||||
"privacy",
|
||||
"threat",
|
||||
"vulnerability",
|
||||
"owasp",
|
||||
"pci",
|
||||
"gdpr",
|
||||
"secrets",
|
||||
"risk",
|
||||
"malware",
|
||||
"forensics",
|
||||
"attack",
|
||||
"incident",
|
||||
"auth",
|
||||
"mtls",
|
||||
"zero",
|
||||
"trust",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'infrastructure',
|
||||
name: "infrastructure",
|
||||
keywords: [
|
||||
'kubernetes', 'k8s', 'helm', 'terraform', 'cloud', 'network', 'devops', 'gitops', 'prometheus',
|
||||
'grafana', 'observability', 'monitoring', 'logging', 'tracing', 'deployment', 'istio', 'linkerd',
|
||||
'service', 'mesh', 'slo', 'sre', 'oncall', 'incident', 'pipeline', 'cicd', 'ci', 'cd', 'kafka',
|
||||
"kubernetes",
|
||||
"k8s",
|
||||
"helm",
|
||||
"terraform",
|
||||
"cloud",
|
||||
"network",
|
||||
"devops",
|
||||
"gitops",
|
||||
"prometheus",
|
||||
"grafana",
|
||||
"observability",
|
||||
"monitoring",
|
||||
"logging",
|
||||
"tracing",
|
||||
"deployment",
|
||||
"istio",
|
||||
"linkerd",
|
||||
"service",
|
||||
"mesh",
|
||||
"slo",
|
||||
"sre",
|
||||
"oncall",
|
||||
"incident",
|
||||
"pipeline",
|
||||
"cicd",
|
||||
"ci",
|
||||
"cd",
|
||||
"kafka",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'data-ai',
|
||||
name: "data-ai",
|
||||
keywords: [
|
||||
'data', 'database', 'db', 'sql', 'postgres', 'mysql', 'analytics', 'etl', 'warehouse', 'dbt',
|
||||
'ml', 'ai', 'llm', 'rag', 'vector', 'embedding', 'spark', 'airflow', 'cdc', 'pipeline',
|
||||
"data",
|
||||
"database",
|
||||
"db",
|
||||
"sql",
|
||||
"postgres",
|
||||
"mysql",
|
||||
"analytics",
|
||||
"etl",
|
||||
"warehouse",
|
||||
"dbt",
|
||||
"ml",
|
||||
"ai",
|
||||
"llm",
|
||||
"rag",
|
||||
"vector",
|
||||
"embedding",
|
||||
"spark",
|
||||
"airflow",
|
||||
"cdc",
|
||||
"pipeline",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'development',
|
||||
name: "development",
|
||||
keywords: [
|
||||
'python', 'javascript', 'typescript', 'java', 'golang', 'go', 'rust', 'csharp', 'dotnet', 'php',
|
||||
'ruby', 'node', 'react', 'frontend', 'backend', 'mobile', 'ios', 'android', 'flutter', 'fastapi',
|
||||
'django', 'nextjs', 'vue', 'api',
|
||||
"python",
|
||||
"javascript",
|
||||
"typescript",
|
||||
"java",
|
||||
"golang",
|
||||
"go",
|
||||
"rust",
|
||||
"csharp",
|
||||
"dotnet",
|
||||
"php",
|
||||
"ruby",
|
||||
"node",
|
||||
"react",
|
||||
"frontend",
|
||||
"backend",
|
||||
"mobile",
|
||||
"ios",
|
||||
"android",
|
||||
"flutter",
|
||||
"fastapi",
|
||||
"django",
|
||||
"nextjs",
|
||||
"vue",
|
||||
"api",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'architecture',
|
||||
name: "architecture",
|
||||
keywords: [
|
||||
'architecture', 'c4', 'microservices', 'event', 'cqrs', 'saga', 'domain', 'ddd', 'patterns',
|
||||
'decision', 'adr',
|
||||
"architecture",
|
||||
"c4",
|
||||
"microservices",
|
||||
"event",
|
||||
"cqrs",
|
||||
"saga",
|
||||
"domain",
|
||||
"ddd",
|
||||
"patterns",
|
||||
"decision",
|
||||
"adr",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'testing',
|
||||
keywords: ['testing', 'tdd', 'unit', 'e2e', 'qa', 'test'],
|
||||
name: "testing",
|
||||
keywords: ["testing", "tdd", "unit", "e2e", "qa", "test"],
|
||||
},
|
||||
{
|
||||
name: 'business',
|
||||
name: "business",
|
||||
keywords: [
|
||||
'business', 'market', 'sales', 'finance', 'startup', 'legal', 'hr', 'product', 'customer', 'seo',
|
||||
'marketing', 'kpi', 'contract', 'employment',
|
||||
"business",
|
||||
"market",
|
||||
"sales",
|
||||
"finance",
|
||||
"startup",
|
||||
"legal",
|
||||
"hr",
|
||||
"product",
|
||||
"customer",
|
||||
"seo",
|
||||
"marketing",
|
||||
"kpi",
|
||||
"contract",
|
||||
"employment",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'workflow',
|
||||
keywords: ['workflow', 'orchestration', 'conductor', 'automation', 'process', 'collaboration'],
|
||||
name: "workflow",
|
||||
keywords: [
|
||||
"workflow",
|
||||
"orchestration",
|
||||
"conductor",
|
||||
"automation",
|
||||
"process",
|
||||
"collaboration",
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const BUNDLE_RULES = {
|
||||
'core-dev': {
|
||||
description: 'Core development skills across languages, frameworks, and backend/frontend fundamentals.',
|
||||
"core-dev": {
|
||||
description:
|
||||
"Core development skills across languages, frameworks, and backend/frontend fundamentals.",
|
||||
keywords: [
|
||||
'python', 'javascript', 'typescript', 'go', 'golang', 'rust', 'java', 'node', 'frontend', 'backend',
|
||||
'react', 'fastapi', 'django', 'nextjs', 'api', 'mobile', 'ios', 'android', 'flutter', 'php', 'ruby',
|
||||
"python",
|
||||
"javascript",
|
||||
"typescript",
|
||||
"go",
|
||||
"golang",
|
||||
"rust",
|
||||
"java",
|
||||
"node",
|
||||
"frontend",
|
||||
"backend",
|
||||
"react",
|
||||
"fastapi",
|
||||
"django",
|
||||
"nextjs",
|
||||
"api",
|
||||
"mobile",
|
||||
"ios",
|
||||
"android",
|
||||
"flutter",
|
||||
"php",
|
||||
"ruby",
|
||||
],
|
||||
},
|
||||
'security-core': {
|
||||
description: 'Security, privacy, and compliance essentials.',
|
||||
"security-core": {
|
||||
description: "Security, privacy, and compliance essentials.",
|
||||
keywords: [
|
||||
'security', 'sast', 'compliance', 'threat', 'risk', 'privacy', 'secrets', 'owasp', 'gdpr', 'pci',
|
||||
'vulnerability', 'auth',
|
||||
"security",
|
||||
"sast",
|
||||
"compliance",
|
||||
"threat",
|
||||
"risk",
|
||||
"privacy",
|
||||
"secrets",
|
||||
"owasp",
|
||||
"gdpr",
|
||||
"pci",
|
||||
"vulnerability",
|
||||
"auth",
|
||||
],
|
||||
},
|
||||
'k8s-core': {
|
||||
description: 'Kubernetes and service mesh essentials.',
|
||||
keywords: ['kubernetes', 'k8s', 'helm', 'istio', 'linkerd', 'service', 'mesh'],
|
||||
},
|
||||
'data-core': {
|
||||
description: 'Data engineering and analytics foundations.',
|
||||
"k8s-core": {
|
||||
description: "Kubernetes and service mesh essentials.",
|
||||
keywords: [
|
||||
'data', 'database', 'sql', 'dbt', 'airflow', 'spark', 'analytics', 'etl', 'warehouse', 'postgres',
|
||||
'mysql', 'kafka',
|
||||
"kubernetes",
|
||||
"k8s",
|
||||
"helm",
|
||||
"istio",
|
||||
"linkerd",
|
||||
"service",
|
||||
"mesh",
|
||||
],
|
||||
},
|
||||
'ops-core': {
|
||||
description: 'Operations, observability, and delivery pipelines.',
|
||||
"data-core": {
|
||||
description: "Data engineering and analytics foundations.",
|
||||
keywords: [
|
||||
'observability', 'monitoring', 'logging', 'tracing', 'prometheus', 'grafana', 'devops', 'gitops',
|
||||
'deployment', 'cicd', 'pipeline', 'slo', 'sre', 'incident',
|
||||
"data",
|
||||
"database",
|
||||
"sql",
|
||||
"dbt",
|
||||
"airflow",
|
||||
"spark",
|
||||
"analytics",
|
||||
"etl",
|
||||
"warehouse",
|
||||
"postgres",
|
||||
"mysql",
|
||||
"kafka",
|
||||
],
|
||||
},
|
||||
"ops-core": {
|
||||
description: "Operations, observability, and delivery pipelines.",
|
||||
keywords: [
|
||||
"observability",
|
||||
"monitoring",
|
||||
"logging",
|
||||
"tracing",
|
||||
"prometheus",
|
||||
"grafana",
|
||||
"devops",
|
||||
"gitops",
|
||||
"deployment",
|
||||
"cicd",
|
||||
"pipeline",
|
||||
"slo",
|
||||
"sre",
|
||||
"incident",
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
const CURATED_COMMON = [
|
||||
'bash-pro',
|
||||
'python-pro',
|
||||
'javascript-pro',
|
||||
'typescript-pro',
|
||||
'golang-pro',
|
||||
'rust-pro',
|
||||
'java-pro',
|
||||
'frontend-developer',
|
||||
'backend-architect',
|
||||
'nodejs-backend-patterns',
|
||||
'fastapi-pro',
|
||||
'api-design-principles',
|
||||
'sql-pro',
|
||||
'database-architect',
|
||||
'kubernetes-architect',
|
||||
'terraform-specialist',
|
||||
'observability-engineer',
|
||||
'security-auditor',
|
||||
'sast-configuration',
|
||||
'gitops-workflow',
|
||||
"bash-pro",
|
||||
"python-pro",
|
||||
"javascript-pro",
|
||||
"typescript-pro",
|
||||
"golang-pro",
|
||||
"rust-pro",
|
||||
"java-pro",
|
||||
"frontend-developer",
|
||||
"backend-architect",
|
||||
"nodejs-backend-patterns",
|
||||
"fastapi-pro",
|
||||
"api-design-principles",
|
||||
"sql-pro",
|
||||
"database-architect",
|
||||
"kubernetes-architect",
|
||||
"terraform-specialist",
|
||||
"observability-engineer",
|
||||
"security-auditor",
|
||||
"sast-configuration",
|
||||
"gitops-workflow",
|
||||
];
|
||||
|
||||
function normalizeTokens(tokens) {
|
||||
return unique(tokens.map(token => token.toLowerCase())).filter(Boolean);
|
||||
return unique(tokens.map((token) => token.toLowerCase())).filter(Boolean);
|
||||
}
|
||||
|
||||
function deriveTags(skill) {
|
||||
let tags = Array.isArray(skill.tags) ? skill.tags : [];
|
||||
tags = tags.map(tag => tag.toLowerCase()).filter(Boolean);
|
||||
tags = tags.map((tag) => tag.toLowerCase()).filter(Boolean);
|
||||
|
||||
if (!tags.length) {
|
||||
tags = skill.id
|
||||
.split('-')
|
||||
.map(tag => tag.toLowerCase())
|
||||
.filter(tag => tag && !TAG_STOPWORDS.has(tag));
|
||||
.split("-")
|
||||
.map((tag) => tag.toLowerCase())
|
||||
.filter((tag) => tag && !TAG_STOPWORDS.has(tag));
|
||||
}
|
||||
|
||||
return normalizeTokens(tags);
|
||||
@@ -177,17 +470,18 @@ function detectCategory(skill, tags) {
|
||||
}
|
||||
}
|
||||
|
||||
return 'general';
|
||||
return "general";
|
||||
}
|
||||
|
||||
function buildTriggers(skill, tags) {
|
||||
const tokens = tokenize(`${skill.name} ${skill.description}`)
|
||||
.filter(token => token.length >= 2 && !STOPWORDS.has(token));
|
||||
const tokens = tokenize(`${skill.name} ${skill.description}`).filter(
|
||||
(token) => token.length >= 2 && !STOPWORDS.has(token),
|
||||
);
|
||||
return unique([...tags, ...tokens]).slice(0, 12);
|
||||
}
|
||||
|
||||
function buildAliases(skills) {
|
||||
const existingIds = new Set(skills.map(skill => skill.id));
|
||||
const existingIds = new Set(skills.map((skill) => skill.id));
|
||||
const aliases = {};
|
||||
const used = new Set();
|
||||
|
||||
@@ -200,7 +494,7 @@ function buildAliases(skills) {
|
||||
}
|
||||
}
|
||||
|
||||
const tokens = skill.id.split('-').filter(Boolean);
|
||||
const tokens = skill.id.split("-").filter(Boolean);
|
||||
if (skill.id.length < 28 || tokens.length < 4) continue;
|
||||
|
||||
const deduped = [];
|
||||
@@ -211,10 +505,11 @@ function buildAliases(skills) {
|
||||
deduped.push(token);
|
||||
}
|
||||
|
||||
const aliasTokens = deduped.length > 3
|
||||
? [deduped[0], deduped[1], deduped[deduped.length - 1]]
|
||||
: deduped;
|
||||
const alias = unique(aliasTokens).join('-');
|
||||
const aliasTokens =
|
||||
deduped.length > 3
|
||||
? [deduped[0], deduped[1], deduped[deduped.length - 1]]
|
||||
: deduped;
|
||||
const alias = unique(aliasTokens).join("-");
|
||||
|
||||
if (!alias || alias === skill.id) continue;
|
||||
if (existingIds.has(alias) || used.has(alias)) continue;
|
||||
@@ -241,11 +536,11 @@ function buildBundles(skills) {
|
||||
|
||||
for (const [bundleName, rule] of Object.entries(BUNDLE_RULES)) {
|
||||
const bundleSkills = [];
|
||||
const keywords = rule.keywords.map(keyword => keyword.toLowerCase());
|
||||
const keywords = rule.keywords.map((keyword) => keyword.toLowerCase());
|
||||
|
||||
for (const skill of skills) {
|
||||
const tokenSet = skillTokens.get(skill.id) || new Set();
|
||||
if (keywords.some(keyword => tokenSet.has(keyword))) {
|
||||
if (keywords.some((keyword) => tokenSet.has(keyword))) {
|
||||
bundleSkills.push(skill.id);
|
||||
}
|
||||
}
|
||||
@@ -256,49 +551,58 @@ function buildBundles(skills) {
|
||||
};
|
||||
}
|
||||
|
||||
const common = CURATED_COMMON.filter(skillId => skillTokens.has(skillId));
|
||||
const common = CURATED_COMMON.filter((skillId) => skillTokens.has(skillId));
|
||||
|
||||
return { bundles, common };
|
||||
}
|
||||
|
||||
function truncate(value, limit) {
|
||||
if (!value || value.length <= limit) return value || '';
|
||||
if (!value || value.length <= limit) return value || "";
|
||||
return `${value.slice(0, limit - 3)}...`;
|
||||
}
|
||||
|
||||
function renderCatalogMarkdown(catalog) {
|
||||
const lines = [];
|
||||
lines.push('# Skill Catalog');
|
||||
lines.push('');
|
||||
lines.push("# Skill Catalog");
|
||||
lines.push("");
|
||||
lines.push(`Generated at: ${catalog.generatedAt}`);
|
||||
lines.push('');
|
||||
lines.push("");
|
||||
lines.push(`Total skills: ${catalog.total}`);
|
||||
lines.push('');
|
||||
lines.push("");
|
||||
|
||||
const categories = Array.from(new Set(catalog.skills.map(skill => skill.category))).sort();
|
||||
const categories = Array.from(
|
||||
new Set(catalog.skills.map((skill) => skill.category)),
|
||||
).sort();
|
||||
for (const category of categories) {
|
||||
const grouped = catalog.skills.filter(skill => skill.category === category);
|
||||
const grouped = catalog.skills.filter(
|
||||
(skill) => skill.category === category,
|
||||
);
|
||||
lines.push(`## ${category} (${grouped.length})`);
|
||||
lines.push('');
|
||||
lines.push('| Skill | Description | Tags | Triggers |');
|
||||
lines.push('| --- | --- | --- | --- |');
|
||||
lines.push("");
|
||||
lines.push("| Skill | Description | Tags | Triggers |");
|
||||
lines.push("| --- | --- | --- | --- |");
|
||||
|
||||
for (const skill of grouped) {
|
||||
const description = truncate(skill.description, 160).replace(/\|/g, '\\|');
|
||||
const tags = skill.tags.join(', ');
|
||||
const triggers = skill.triggers.join(', ');
|
||||
lines.push(`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`);
|
||||
const description = truncate(skill.description, 160).replace(
|
||||
/\|/g,
|
||||
"\\|",
|
||||
);
|
||||
const tags = skill.tags.join(", ");
|
||||
const triggers = skill.triggers.join(", ");
|
||||
lines.push(
|
||||
`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`,
|
||||
);
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function buildCatalog() {
|
||||
const skillRelPaths = listSkillIdsRecursive(SKILLS_DIR);
|
||||
const skills = skillRelPaths.map(relPath => readSkill(SKILLS_DIR, relPath));
|
||||
const skills = skillRelPaths.map((relPath) => readSkill(SKILLS_DIR, relPath));
|
||||
const catalogSkills = [];
|
||||
|
||||
for (const skill of skills) {
|
||||
@@ -318,26 +622,32 @@ function buildCatalog() {
|
||||
}
|
||||
|
||||
const catalog = {
|
||||
generatedAt: process.env.SOURCE_DATE_EPOCH
|
||||
? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
|
||||
: (process.env.CI ? '2026-02-08T00:00:00.000Z' : new Date().toISOString()),
|
||||
generatedAt: process.env.SOURCE_DATE_EPOCH
|
||||
? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
|
||||
: "2026-02-08T00:00:00.000Z",
|
||||
total: catalogSkills.length,
|
||||
skills: catalogSkills.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)),
|
||||
skills: catalogSkills.sort((a, b) =>
|
||||
a.id < b.id ? -1 : a.id > b.id ? 1 : 0,
|
||||
),
|
||||
};
|
||||
|
||||
const aliases = buildAliases(catalog.skills);
|
||||
const bundleData = buildBundles(catalog.skills);
|
||||
|
||||
const catalogPath = path.join(ROOT, 'data', 'catalog.json');
|
||||
const catalogMarkdownPath = path.join(ROOT, 'CATALOG.md');
|
||||
const bundlesPath = path.join(ROOT, 'data', 'bundles.json');
|
||||
const aliasesPath = path.join(ROOT, 'data', 'aliases.json');
|
||||
const catalogPath = path.join(ROOT, "data", "catalog.json");
|
||||
const catalogMarkdownPath = path.join(ROOT, "CATALOG.md");
|
||||
const bundlesPath = path.join(ROOT, "data", "bundles.json");
|
||||
const aliasesPath = path.join(ROOT, "data", "aliases.json");
|
||||
|
||||
fs.writeFileSync(catalogPath, JSON.stringify(catalog, null, 2));
|
||||
fs.writeFileSync(catalogMarkdownPath, renderCatalogMarkdown(catalog));
|
||||
fs.writeFileSync(
|
||||
bundlesPath,
|
||||
JSON.stringify({ generatedAt: catalog.generatedAt, ...bundleData }, null, 2),
|
||||
JSON.stringify(
|
||||
{ generatedAt: catalog.generatedAt, ...bundleData },
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
fs.writeFileSync(
|
||||
aliasesPath,
|
||||
|
||||
@@ -1,285 +1,293 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Sync Microsoft Skills Repository - v3
|
||||
Preserves original structure from skills/ directory and handles all locations
|
||||
Sync Microsoft Skills Repository - v4 (Flat Structure)
|
||||
Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory
|
||||
name under skills/ to comply with the repository's indexing conventions.
|
||||
"""
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
MS_REPO = "https://github.com/microsoft/skills.git"
|
||||
TARGET_DIR = Path(__file__).parent.parent / "skills"
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
TARGET_DIR = REPO_ROOT / "skills"
|
||||
DOCS_DIR = REPO_ROOT / "docs"
|
||||
|
||||
|
||||
def clone_repo(temp_dir: Path):
|
||||
"""Clone Microsoft skills repository"""
|
||||
"""Clone Microsoft skills repository (shallow)."""
|
||||
print("🔄 Cloning Microsoft Skills repository...")
|
||||
subprocess.run(
|
||||
["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)],
|
||||
check=True
|
||||
check=True,
|
||||
)
|
||||
|
||||
def find_all_skills(source_dir: Path):
|
||||
"""Find all SKILL.md files in the repository"""
|
||||
all_skills = {}
|
||||
|
||||
# Search in .github/skills/
|
||||
github_skills = source_dir / ".github" / "skills"
|
||||
if github_skills.exists():
|
||||
for skill_dir in github_skills.iterdir():
|
||||
if skill_dir.is_dir() and (skill_dir / "SKILL.md").exists():
|
||||
all_skills[skill_dir.name] = skill_dir
|
||||
|
||||
# Search in .github/plugins/
|
||||
github_plugins = source_dir / ".github" / "plugins"
|
||||
if github_plugins.exists():
|
||||
for skill_file in github_plugins.rglob("SKILL.md"):
|
||||
skill_dir = skill_file.parent
|
||||
skill_name = skill_dir.name
|
||||
if skill_name not in all_skills:
|
||||
all_skills[skill_name] = skill_dir
|
||||
|
||||
return all_skills
|
||||
|
||||
def sync_skills_preserve_structure(source_dir: Path, target_dir: Path):
|
||||
def extract_skill_name(skill_md_path: Path) -> str | None:
|
||||
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
|
||||
try:
|
||||
content = skill_md_path.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
|
||||
if not fm_match:
|
||||
return None
|
||||
|
||||
for line in fm_match.group(1).splitlines():
|
||||
match = re.match(r"^name:\s*(.+)$", line)
|
||||
if match:
|
||||
value = match.group(1).strip().strip("\"'")
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def generate_fallback_name(relative_path: Path) -> str:
|
||||
"""
|
||||
Sync skills preserving the original skills/ directory structure.
|
||||
This is better than auto-categorization since MS already organized them.
|
||||
Generate a fallback directory name when frontmatter 'name' is missing.
|
||||
Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'.
|
||||
"""
|
||||
parts = [p for p in relative_path.parts if p]
|
||||
return "ms-" + "-".join(parts)
|
||||
|
||||
|
||||
def find_skills_in_directory(source_dir: Path):
|
||||
"""
|
||||
Walk the Microsoft repo's skills/ directory (which uses symlinks)
|
||||
and resolve each to its actual SKILL.md content.
|
||||
Returns list of dicts: {relative_path, skill_md_path, source_dir}.
|
||||
"""
|
||||
skills_source = source_dir / "skills"
|
||||
|
||||
results = []
|
||||
|
||||
if not skills_source.exists():
|
||||
print(" ⚠️ skills/ directory not found, will use flat structure")
|
||||
return sync_skills_flat(source_dir, target_dir)
|
||||
|
||||
# First, find all actual skill content
|
||||
all_skills = find_all_skills(source_dir)
|
||||
print(f" 📂 Found {len(all_skills)} total skills in repository")
|
||||
|
||||
synced_count = 0
|
||||
skill_metadata = []
|
||||
|
||||
# Walk through the skills/ directory structure
|
||||
return results
|
||||
|
||||
for item in skills_source.rglob("*"):
|
||||
# Skip non-directories
|
||||
if not item.is_dir():
|
||||
continue
|
||||
|
||||
# Check if this directory (or its symlink target) contains a SKILL.md
|
||||
|
||||
skill_md = None
|
||||
skill_source_dir = None
|
||||
|
||||
# If it's a symlink, resolve it
|
||||
actual_dir = None
|
||||
|
||||
if item.is_symlink():
|
||||
try:
|
||||
resolved = item.resolve()
|
||||
if (resolved / "SKILL.md").exists():
|
||||
skill_md = resolved / "SKILL.md"
|
||||
skill_source_dir = resolved
|
||||
except:
|
||||
actual_dir = resolved
|
||||
except Exception:
|
||||
continue
|
||||
elif (item / "SKILL.md").exists():
|
||||
skill_md = item / "SKILL.md"
|
||||
skill_source_dir = item
|
||||
|
||||
actual_dir = item
|
||||
|
||||
if skill_md is None:
|
||||
continue
|
||||
|
||||
# Get relative path from skills/ directory - this preserves MS's organization
|
||||
|
||||
try:
|
||||
relative_path = item.relative_to(skills_source)
|
||||
except ValueError:
|
||||
# Shouldn't happen, but handle it
|
||||
continue
|
||||
|
||||
# Create target directory preserving structure
|
||||
target_skill_dir = target_dir / "official" / "microsoft" / relative_path
|
||||
target_skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Copy SKILL.md
|
||||
shutil.copy2(skill_md, target_skill_dir / "SKILL.md")
|
||||
|
||||
# Copy other files from the actual skill directory
|
||||
for file_item in skill_source_dir.iterdir():
|
||||
if file_item.name != "SKILL.md" and file_item.is_file():
|
||||
shutil.copy2(file_item, target_skill_dir / file_item.name)
|
||||
|
||||
# Collect metadata
|
||||
skill_metadata.append({
|
||||
"path": str(relative_path),
|
||||
"name": item.name,
|
||||
"category": str(relative_path.parent),
|
||||
"source": str(skill_source_dir.relative_to(source_dir))
|
||||
})
|
||||
|
||||
synced_count += 1
|
||||
print(f" ✅ Synced: {relative_path}")
|
||||
|
||||
# Also sync any skills from .github/plugins that aren't symlinked in skills/
|
||||
plugin_skills = find_plugin_skills(source_dir, skill_metadata)
|
||||
if plugin_skills:
|
||||
print(f"\n 📦 Found {len(plugin_skills)} additional plugin skills")
|
||||
for plugin_skill in plugin_skills:
|
||||
target_skill_dir = target_dir / "official" / "microsoft" / "plugins" / plugin_skill['name']
|
||||
target_skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Copy SKILL.md
|
||||
shutil.copy2(plugin_skill['source'] / "SKILL.md", target_skill_dir / "SKILL.md")
|
||||
|
||||
# Copy other files
|
||||
for file_item in plugin_skill['source'].iterdir():
|
||||
if file_item.name != "SKILL.md" and file_item.is_file():
|
||||
shutil.copy2(file_item, target_skill_dir / file_item.name)
|
||||
|
||||
skill_metadata.append({
|
||||
"path": f"plugins/{plugin_skill['name']}",
|
||||
"name": plugin_skill['name'],
|
||||
"category": "plugins",
|
||||
"source": str(plugin_skill['source'].relative_to(source_dir))
|
||||
})
|
||||
|
||||
synced_count += 1
|
||||
print(f" ✅ Synced: plugins/{plugin_skill['name']}")
|
||||
|
||||
return synced_count, skill_metadata
|
||||
|
||||
def find_plugin_skills(source_dir: Path, already_synced: list):
|
||||
"""Find plugin skills that haven't been synced yet"""
|
||||
synced_names = {s['name'] for s in already_synced}
|
||||
plugin_skills = []
|
||||
|
||||
results.append({
|
||||
"relative_path": relative_path,
|
||||
"skill_md": skill_md,
|
||||
"source_dir": actual_dir,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def find_plugin_skills(source_dir: Path, already_synced_names: set):
|
||||
"""Find plugin skills in .github/plugins/ that haven't been synced yet."""
|
||||
results = []
|
||||
github_plugins = source_dir / ".github" / "plugins"
|
||||
if github_plugins.exists():
|
||||
for skill_file in github_plugins.rglob("SKILL.md"):
|
||||
skill_dir = skill_file.parent
|
||||
skill_name = skill_dir.name
|
||||
|
||||
if skill_name not in synced_names:
|
||||
plugin_skills.append({
|
||||
'name': skill_name,
|
||||
'source': skill_dir
|
||||
})
|
||||
|
||||
return plugin_skills
|
||||
|
||||
if not github_plugins.exists():
|
||||
return results
|
||||
|
||||
for skill_file in github_plugins.rglob("SKILL.md"):
|
||||
skill_dir = skill_file.parent
|
||||
skill_name = skill_dir.name
|
||||
|
||||
if skill_name not in already_synced_names:
|
||||
results.append({
|
||||
"relative_path": Path("plugins") / skill_name,
|
||||
"skill_md": skill_file,
|
||||
"source_dir": skill_dir,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def sync_skills_flat(source_dir: Path, target_dir: Path):
|
||||
"""Fallback: sync all skills in a flat structure"""
|
||||
all_skills = find_all_skills(source_dir)
|
||||
|
||||
"""
|
||||
Sync all Microsoft skills into a flat structure under skills/.
|
||||
Uses frontmatter 'name' as directory name, with collision detection.
|
||||
"""
|
||||
all_skill_entries = find_skills_in_directory(source_dir)
|
||||
print(f" 📂 Found {len(all_skill_entries)} skills in skills/ directory")
|
||||
|
||||
synced_count = 0
|
||||
skill_metadata = []
|
||||
|
||||
for skill_name, skill_dir in all_skills.items():
|
||||
target_skill_dir = target_dir / "official" / "microsoft" / skill_name
|
||||
# name -> original relative_path (for collision logging)
|
||||
used_names: dict[str, str] = {}
|
||||
|
||||
for entry in all_skill_entries:
|
||||
skill_name = extract_skill_name(entry["skill_md"])
|
||||
|
||||
if not skill_name:
|
||||
skill_name = generate_fallback_name(entry["relative_path"])
|
||||
print(
|
||||
f" ⚠️ No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}")
|
||||
|
||||
# Collision detection
|
||||
if skill_name in used_names:
|
||||
original = used_names[skill_name]
|
||||
print(
|
||||
f" ⚠️ Name collision '{skill_name}': {entry['relative_path']} vs {original}")
|
||||
# Append language prefix from path to disambiguate
|
||||
lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown"
|
||||
skill_name = f"{skill_name}-{lang}"
|
||||
print(f" Resolved to: {skill_name}")
|
||||
|
||||
used_names[skill_name] = str(entry["relative_path"])
|
||||
|
||||
# Create flat target directory
|
||||
target_skill_dir = target_dir / skill_name
|
||||
target_skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# Copy SKILL.md
|
||||
shutil.copy2(skill_dir / "SKILL.md", target_skill_dir / "SKILL.md")
|
||||
|
||||
# Copy other files
|
||||
for item in skill_dir.iterdir():
|
||||
if item.name != "SKILL.md" and item.is_file():
|
||||
shutil.copy2(item, target_skill_dir / item.name)
|
||||
|
||||
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
|
||||
|
||||
# Copy other files from the skill directory
|
||||
for file_item in entry["source_dir"].iterdir():
|
||||
if file_item.name != "SKILL.md" and file_item.is_file():
|
||||
shutil.copy2(file_item, target_skill_dir / file_item.name)
|
||||
|
||||
skill_metadata.append({
|
||||
"path": skill_name,
|
||||
"name": skill_name,
|
||||
"category": "root"
|
||||
"flat_name": skill_name,
|
||||
"original_path": str(entry["relative_path"]),
|
||||
"source": "microsoft/skills",
|
||||
})
|
||||
|
||||
|
||||
synced_count += 1
|
||||
print(f" ✅ Synced: {skill_name}")
|
||||
|
||||
print(f" ✅ {entry['relative_path']} → skills/{skill_name}/")
|
||||
|
||||
# Sync plugin skills
|
||||
synced_names = set(used_names.keys())
|
||||
plugin_entries = find_plugin_skills(
|
||||
source_dir, {e["source_dir"].name for e in all_skill_entries})
|
||||
|
||||
if plugin_entries:
|
||||
print(f"\n 📦 Found {len(plugin_entries)} additional plugin skills")
|
||||
for entry in plugin_entries:
|
||||
skill_name = extract_skill_name(entry["skill_md"])
|
||||
if not skill_name:
|
||||
skill_name = entry["source_dir"].name
|
||||
|
||||
if skill_name in synced_names:
|
||||
skill_name = f"{skill_name}-plugin"
|
||||
|
||||
synced_names.add(skill_name)
|
||||
|
||||
target_skill_dir = target_dir / skill_name
|
||||
target_skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
|
||||
|
||||
for file_item in entry["source_dir"].iterdir():
|
||||
if file_item.name != "SKILL.md" and file_item.is_file():
|
||||
shutil.copy2(file_item, target_skill_dir / file_item.name)
|
||||
|
||||
skill_metadata.append({
|
||||
"flat_name": skill_name,
|
||||
"original_path": str(entry["relative_path"]),
|
||||
"source": "microsoft/skills (plugin)",
|
||||
})
|
||||
|
||||
synced_count += 1
|
||||
print(f" ✅ {entry['relative_path']} → skills/{skill_name}/")
|
||||
|
||||
return synced_count, skill_metadata
|
||||
|
||||
def create_attribution_file(target_dir: Path, metadata: list):
|
||||
"""Create attribution and metadata file"""
|
||||
|
||||
def save_attribution(metadata: list):
|
||||
"""Save attribution metadata to docs/."""
|
||||
DOCS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
attribution = {
|
||||
"source": "microsoft/skills",
|
||||
"repository": "https://github.com/microsoft/skills",
|
||||
"license": "MIT",
|
||||
"synced_skills": len(metadata),
|
||||
"structure": "flat (frontmatter name as directory name)",
|
||||
"skills": metadata,
|
||||
"note": "Symlinks resolved and content copied for compatibility. Original directory structure preserved."
|
||||
}
|
||||
|
||||
ms_dir = target_dir / "official" / "microsoft"
|
||||
ms_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(ms_dir / "ATTRIBUTION.json", "w") as f:
|
||||
with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f:
|
||||
json.dump(attribution, f, indent=2)
|
||||
|
||||
def copy_documentation(source_dir: Path, target_dir: Path):
|
||||
"""Copy LICENSE and README files"""
|
||||
ms_dir = target_dir / "official" / "microsoft"
|
||||
ms_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def copy_license(source_dir: Path):
|
||||
"""Copy the Microsoft LICENSE to docs/."""
|
||||
DOCS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if (source_dir / "LICENSE").exists():
|
||||
shutil.copy2(source_dir / "LICENSE", ms_dir / "LICENSE")
|
||||
|
||||
if (source_dir / "README.md").exists():
|
||||
shutil.copy2(source_dir / "README.md", ms_dir / "README-MICROSOFT.md")
|
||||
shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main sync function"""
|
||||
print("🚀 Microsoft Skills Sync Script v3")
|
||||
print("=" * 50)
|
||||
|
||||
"""Main sync function."""
|
||||
print("🚀 Microsoft Skills Sync Script v4 (Flat Structure)")
|
||||
print("=" * 55)
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
|
||||
try:
|
||||
# Clone repository
|
||||
clone_repo(temp_path)
|
||||
|
||||
# Create target directory
|
||||
|
||||
TARGET_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sync skills (preserving structure)
|
||||
print("\n🔗 Resolving symlinks and preserving directory structure...")
|
||||
count, metadata = sync_skills_preserve_structure(temp_path, TARGET_DIR)
|
||||
|
||||
# Copy documentation
|
||||
print("\n📄 Copying documentation...")
|
||||
copy_documentation(temp_path, TARGET_DIR)
|
||||
|
||||
# Create attribution file
|
||||
print("📝 Creating attribution metadata...")
|
||||
create_attribution_file(TARGET_DIR, metadata)
|
||||
|
||||
print(f"\n✨ Success! Synced {count} Microsoft skills")
|
||||
print(f"📁 Location: {TARGET_DIR / 'official' / 'microsoft'}")
|
||||
|
||||
# Show structure summary
|
||||
ms_dir = TARGET_DIR / "official" / "microsoft"
|
||||
categories = set()
|
||||
|
||||
print("\n🔗 Resolving symlinks and flattening into skills/<name>/...")
|
||||
count, metadata = sync_skills_flat(temp_path, TARGET_DIR)
|
||||
|
||||
print("\n📄 Saving attribution...")
|
||||
save_attribution(metadata)
|
||||
copy_license(temp_path)
|
||||
|
||||
print(
|
||||
f"\n✨ Success! Synced {count} Microsoft skills (flat structure)")
|
||||
print(f"📁 Location: {TARGET_DIR}/")
|
||||
|
||||
# Show summary of languages
|
||||
languages = set()
|
||||
for skill in metadata:
|
||||
cat = skill.get('category', 'root')
|
||||
if cat != 'root':
|
||||
categories.add(cat.split('/')[0] if '/' in cat else cat)
|
||||
|
||||
parts = skill["original_path"].split("/")
|
||||
if len(parts) >= 1 and parts[0] != "plugins":
|
||||
languages.add(parts[0])
|
||||
|
||||
print(f"\n📊 Organization:")
|
||||
print(f" Total skills: {count}")
|
||||
print(f" Categories: {', '.join(sorted(categories)[:10])}")
|
||||
if len(categories) > 10:
|
||||
print(f" ... and {len(categories) - 10} more")
|
||||
|
||||
print(f" Languages: {', '.join(sorted(languages))}")
|
||||
|
||||
print("\n📋 Next steps:")
|
||||
print("1. Review synced skills")
|
||||
print("2. Run: npm run validate")
|
||||
print("3. Update CATALOG.md")
|
||||
print("4. Update docs/SOURCES.md")
|
||||
print("5. Commit changes and create PR")
|
||||
|
||||
print("1. Delete old skills/official/ directory (if it exists)")
|
||||
print("2. Run: npm run build")
|
||||
print("3. Commit changes and create PR")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
exit(main())
|
||||
|
||||
@@ -1,149 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script to inspect Microsoft Skills repository structure - v2
|
||||
Handles all skill locations including plugins
|
||||
Inspect Microsoft Skills Repository Structure
|
||||
Shows the repository layout, skill locations, and what flat names would be generated.
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
MS_REPO = "https://github.com/microsoft/skills.git"
|
||||
|
||||
|
||||
def extract_skill_name(skill_md_path: Path) -> str | None:
|
||||
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
|
||||
try:
|
||||
content = skill_md_path.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
|
||||
if not fm_match:
|
||||
return None
|
||||
|
||||
for line in fm_match.group(1).splitlines():
|
||||
match = re.match(r"^name:\s*(.+)$", line)
|
||||
if match:
|
||||
value = match.group(1).strip().strip("\"'")
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def inspect_repo():
|
||||
"""Inspect the Microsoft skills repository structure"""
|
||||
"""Inspect the Microsoft skills repository structure."""
|
||||
print("🔍 Inspecting Microsoft Skills Repository Structure")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
|
||||
print("\n1️⃣ Cloning repository...")
|
||||
subprocess.run(
|
||||
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
|
||||
check=True,
|
||||
capture_output=True
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
print("\n2️⃣ Repository structure:")
|
||||
print("\nTop-level directories:")
|
||||
for item in temp_path.iterdir():
|
||||
if item.is_dir():
|
||||
print(f" 📁 {item.name}/")
|
||||
|
||||
# Check .github/skills
|
||||
github_skills = temp_path / ".github" / "skills"
|
||||
if github_skills.exists():
|
||||
skill_dirs = [d for d in github_skills.iterdir() if d.is_dir()]
|
||||
print(f"\n3️⃣ Found {len(skill_dirs)} directories in .github/skills/:")
|
||||
for skill_dir in skill_dirs[:5]:
|
||||
has_skill_md = (skill_dir / "SKILL.md").exists()
|
||||
print(f" {'✅' if has_skill_md else '❌'} {skill_dir.name}")
|
||||
if len(skill_dirs) > 5:
|
||||
print(f" ... and {len(skill_dirs) - 5} more")
|
||||
|
||||
# Check .github/plugins
|
||||
github_plugins = temp_path / ".github" / "plugins"
|
||||
if github_plugins.exists():
|
||||
plugin_skills = list(github_plugins.rglob("SKILL.md"))
|
||||
print(f"\n🔌 Found {len(plugin_skills)} plugin skills in .github/plugins/:")
|
||||
for skill_file in plugin_skills[:5]:
|
||||
try:
|
||||
rel_path = skill_file.relative_to(github_plugins)
|
||||
print(f" ✅ {rel_path}")
|
||||
except ValueError:
|
||||
print(f" ✅ {skill_file.name}")
|
||||
if len(plugin_skills) > 5:
|
||||
print(f" ... and {len(plugin_skills) - 5} more")
|
||||
|
||||
# Check skills directory
|
||||
skills_dir = temp_path / "skills"
|
||||
if skills_dir.exists():
|
||||
print(f"\n4️⃣ Checking skills/ directory structure:")
|
||||
|
||||
# Count items
|
||||
all_items = list(skills_dir.rglob("*"))
|
||||
symlink_dirs = [s for s in all_items if s.is_symlink() and s.is_dir()]
|
||||
symlink_files = [s for s in all_items if s.is_symlink() and not s.is_dir()]
|
||||
regular_dirs = [s for s in all_items if s.is_dir() and not s.is_symlink()]
|
||||
|
||||
print(f" Total items: {len(all_items)}")
|
||||
print(f" Regular directories: {len(regular_dirs)}")
|
||||
print(f" Symlinked directories: {len(symlink_dirs)}")
|
||||
print(f" Symlinked files: {len(symlink_files)}")
|
||||
|
||||
# Show directory structure
|
||||
print(f"\n Top-level categories in skills/:")
|
||||
for item in skills_dir.iterdir():
|
||||
if item.is_dir():
|
||||
# Count subdirs
|
||||
subdirs = [d for d in item.iterdir() if d.is_dir()]
|
||||
print(f" 📁 {item.name}/ ({len(subdirs)} items)")
|
||||
|
||||
if symlink_dirs:
|
||||
print(f"\n Sample symlinked directories:")
|
||||
for symlink in symlink_dirs[:5]:
|
||||
try:
|
||||
target = symlink.resolve()
|
||||
relative = symlink.relative_to(skills_dir)
|
||||
target_name = target.name if target.exists() else "broken"
|
||||
print(f" {relative} → {target_name}")
|
||||
except:
|
||||
pass
|
||||
|
||||
# Check for all SKILL.md files
|
||||
print(f"\n5️⃣ Comprehensive SKILL.md search:")
|
||||
|
||||
# Find all SKILL.md files
|
||||
all_skill_mds = list(temp_path.rglob("SKILL.md"))
|
||||
print(f" Total SKILL.md files found: {len(all_skill_mds)}")
|
||||
|
||||
# Categorize by location
|
||||
locations = {}
|
||||
for skill_md in all_skill_mds:
|
||||
print(f"\n2️⃣ Total SKILL.md files found: {len(all_skill_mds)}")
|
||||
|
||||
# Show flat name mapping
|
||||
print(f"\n3️⃣ Flat Name Mapping (frontmatter 'name' → directory name):")
|
||||
print("-" * 60)
|
||||
|
||||
names_seen: dict[str, list[str]] = {}
|
||||
|
||||
for skill_md in sorted(all_skill_mds, key=lambda p: str(p)):
|
||||
try:
|
||||
if ".github/skills" in str(skill_md):
|
||||
loc = ".github/skills"
|
||||
elif ".github/plugins" in str(skill_md):
|
||||
loc = ".github/plugins"
|
||||
elif "/skills/" in str(skill_md):
|
||||
loc = "skills/ (structure)"
|
||||
else:
|
||||
loc = "other"
|
||||
|
||||
locations[loc] = locations.get(loc, 0) + 1
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"\n Distribution by location:")
|
||||
for loc, count in sorted(locations.items()):
|
||||
print(f" {loc}: {count}")
|
||||
|
||||
# Show sample skills from each major category
|
||||
print(f"\n6️⃣ Sample skills by category:")
|
||||
|
||||
if skills_dir.exists():
|
||||
for category in list(skills_dir.iterdir())[:3]:
|
||||
if category.is_dir():
|
||||
skills_in_cat = [s for s in category.rglob("*") if s.is_dir() and (s.is_symlink() or (s / "SKILL.md").exists())]
|
||||
print(f"\n {category.name}/ ({len(skills_in_cat)} skills):")
|
||||
for skill in skills_in_cat[:3]:
|
||||
try:
|
||||
rel = skill.relative_to(skills_dir)
|
||||
print(f" - {rel}")
|
||||
except:
|
||||
pass
|
||||
|
||||
print("\n7️⃣ Recommendations:")
|
||||
print(" ✅ Preserve skills/ directory structure (Microsoft's organization)")
|
||||
print(" ✅ Resolve symlinks to actual content in .github/skills/")
|
||||
print(" ✅ Include plugin skills from .github/plugins/")
|
||||
print(" ✅ This gives you the cleanest, most maintainable structure")
|
||||
|
||||
rel = skill_md.parent.relative_to(temp_path)
|
||||
except ValueError:
|
||||
rel = skill_md.parent
|
||||
|
||||
name = extract_skill_name(skill_md)
|
||||
display_name = name if name else f"(no name → ms-{'-'.join(rel.parts[1:])})"
|
||||
|
||||
print(f" {rel} → {display_name}")
|
||||
|
||||
effective_name = name if name else f"ms-{'-'.join(rel.parts[1:])}"
|
||||
if effective_name not in names_seen:
|
||||
names_seen[effective_name] = []
|
||||
names_seen[effective_name].append(str(rel))
|
||||
|
||||
# Collision check
|
||||
collisions = {n: paths for n, paths in names_seen.items()
|
||||
if len(paths) > 1}
|
||||
if collisions:
|
||||
print(f"\n4️⃣ ⚠️ Name Collisions Detected ({len(collisions)}):")
|
||||
for name, paths in collisions.items():
|
||||
print(f" '{name}':")
|
||||
for p in paths:
|
||||
print(f" - {p}")
|
||||
else:
|
||||
print(
|
||||
f"\n4️⃣ ✅ No name collisions — all {len(names_seen)} names are unique!")
|
||||
|
||||
print("\n✨ Inspection complete!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
inspect_repo()
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
traceback.print_exc()
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test Script: Verify Microsoft Skills Sync Coverage
|
||||
Tests all possible skill locations and structures
|
||||
Test Script: Verify Microsoft Skills Sync Coverage and Flat Name Uniqueness
|
||||
Ensures all skills are captured and no directory name collisions exist.
|
||||
"""
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
@@ -11,204 +12,177 @@ from collections import defaultdict
|
||||
|
||||
MS_REPO = "https://github.com/microsoft/skills.git"
|
||||
|
||||
|
||||
def extract_skill_name(skill_md_path: Path) -> str | None:
|
||||
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
|
||||
try:
|
||||
content = skill_md_path.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
|
||||
if not fm_match:
|
||||
return None
|
||||
|
||||
for line in fm_match.group(1).splitlines():
|
||||
match = re.match(r"^name:\s*(.+)$", line)
|
||||
if match:
|
||||
value = match.group(1).strip().strip("\"'")
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def analyze_skill_locations():
|
||||
"""
|
||||
Comprehensive analysis of all skill locations in Microsoft repo.
|
||||
Verifies that v3 script will catch everything.
|
||||
Verifies flat name uniqueness and coverage.
|
||||
"""
|
||||
print("🔬 Comprehensive Skill Location Analysis")
|
||||
print("🔬 Comprehensive Skill Coverage & Uniqueness Analysis")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
|
||||
print("\n1️⃣ Cloning repository...")
|
||||
subprocess.run(
|
||||
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
|
||||
check=True,
|
||||
capture_output=True
|
||||
capture_output=True,
|
||||
)
|
||||
|
||||
# Find ALL SKILL.md files in the entire repo
|
||||
|
||||
# Find ALL SKILL.md files
|
||||
all_skill_files = list(temp_path.rglob("SKILL.md"))
|
||||
print(f"\n2️⃣ Total SKILL.md files found: {len(all_skill_files)}")
|
||||
|
||||
# Categorize by location type
|
||||
|
||||
# Categorize by location
|
||||
location_types = defaultdict(list)
|
||||
|
||||
for skill_file in all_skill_files:
|
||||
skill_dir = skill_file.parent
|
||||
|
||||
# Determine location type
|
||||
if ".github/skills" in str(skill_file):
|
||||
path_str = str(skill_file)
|
||||
if ".github/skills" in path_str:
|
||||
location_types["github_skills"].append(skill_file)
|
||||
elif ".github/plugins" in str(skill_file):
|
||||
elif ".github/plugins" in path_str:
|
||||
location_types["github_plugins"].append(skill_file)
|
||||
elif "/skills/" in str(skill_file):
|
||||
# This is in the skills/ directory structure
|
||||
# Check if it's via symlink or actual file
|
||||
try:
|
||||
skills_root = temp_path / "skills"
|
||||
if skills_root in skill_file.parents:
|
||||
# This skill is somewhere under skills/
|
||||
# But is it a symlink or actual?
|
||||
if skill_dir.is_symlink():
|
||||
location_types["skills_symlinked"].append(skill_file)
|
||||
else:
|
||||
# Check if any parent is a symlink
|
||||
has_symlink_parent = False
|
||||
for parent in skill_file.parents:
|
||||
if parent == skills_root:
|
||||
break
|
||||
if parent.is_symlink():
|
||||
has_symlink_parent = True
|
||||
break
|
||||
|
||||
if has_symlink_parent:
|
||||
location_types["skills_via_symlink_parent"].append(skill_file)
|
||||
else:
|
||||
location_types["skills_direct"].append(skill_file)
|
||||
except:
|
||||
location_types["unknown"].append(skill_file)
|
||||
elif "/skills/" in path_str:
|
||||
location_types["skills_dir"].append(skill_file)
|
||||
else:
|
||||
location_types["other"].append(skill_file)
|
||||
|
||||
# Display results
|
||||
|
||||
print("\n3️⃣ Skills by Location Type:")
|
||||
print("-" * 60)
|
||||
|
||||
for loc_type, files in sorted(location_types.items()):
|
||||
print(f"\n 📍 {loc_type}: {len(files)} skills")
|
||||
if len(files) <= 5:
|
||||
for f in files:
|
||||
try:
|
||||
rel = f.relative_to(temp_path)
|
||||
print(f" - {rel}")
|
||||
except:
|
||||
print(f" - {f.name}")
|
||||
else:
|
||||
for f in files[:3]:
|
||||
try:
|
||||
rel = f.relative_to(temp_path)
|
||||
print(f" - {rel}")
|
||||
except:
|
||||
print(f" - {f.name}")
|
||||
print(f" ... and {len(files) - 3} more")
|
||||
|
||||
# Verify v3 coverage
|
||||
print("\n4️⃣ V3 Script Coverage Analysis:")
|
||||
print(f" 📍 {loc_type}: {len(files)} skills")
|
||||
|
||||
# Flat name uniqueness check
|
||||
print("\n4️⃣ Flat Name Uniqueness Check:")
|
||||
print("-" * 60)
|
||||
|
||||
github_skills_count = len(location_types["github_skills"])
|
||||
github_plugins_count = len(location_types["github_plugins"])
|
||||
skills_symlinked_count = len(location_types["skills_symlinked"])
|
||||
skills_direct_count = len(location_types["skills_direct"])
|
||||
skills_via_symlink_parent_count = len(location_types["skills_via_symlink_parent"])
|
||||
|
||||
print(f"\n ✅ .github/skills/: {github_skills_count}")
|
||||
print(f" └─ Handled by: find_all_skills() function")
|
||||
|
||||
print(f"\n ✅ .github/plugins/: {github_plugins_count}")
|
||||
print(f" └─ Handled by: find_plugin_skills() function")
|
||||
|
||||
print(f"\n ✅ skills/ (symlinked dirs): {skills_symlinked_count}")
|
||||
print(f" └─ Handled by: sync_skills_preserve_structure() lines 76-83")
|
||||
|
||||
if skills_direct_count > 0:
|
||||
print(f"\n ✅ skills/ (direct, non-symlink): {skills_direct_count}")
|
||||
print(f" └─ Handled by: sync_skills_preserve_structure() lines 84-86")
|
||||
|
||||
name_map: dict[str, list[str]] = {}
|
||||
missing_names = []
|
||||
|
||||
for skill_file in all_skill_files:
|
||||
try:
|
||||
rel = skill_file.parent.relative_to(temp_path)
|
||||
except ValueError:
|
||||
rel = skill_file.parent
|
||||
|
||||
name = extract_skill_name(skill_file)
|
||||
if not name:
|
||||
missing_names.append(str(rel))
|
||||
# Generate fallback
|
||||
parts = [p for p in rel.parts if p not in (
|
||||
".github", "skills", "plugins")]
|
||||
name = "ms-" + "-".join(parts) if parts else str(rel)
|
||||
|
||||
if name not in name_map:
|
||||
name_map[name] = []
|
||||
name_map[name].append(str(rel))
|
||||
|
||||
# Report results
|
||||
collisions = {n: paths for n, paths in name_map.items()
|
||||
if len(paths) > 1}
|
||||
unique_names = {n: paths for n,
|
||||
paths in name_map.items() if len(paths) == 1}
|
||||
|
||||
print(f"\n ✅ Unique names: {len(unique_names)}")
|
||||
|
||||
if missing_names:
|
||||
print(
|
||||
f"\n ⚠️ Skills missing frontmatter 'name' ({len(missing_names)}):")
|
||||
for path in missing_names[:5]:
|
||||
print(f" - {path}")
|
||||
if len(missing_names) > 5:
|
||||
print(f" ... and {len(missing_names) - 5} more")
|
||||
|
||||
if collisions:
|
||||
print(f"\n ❌ Name collisions ({len(collisions)}):")
|
||||
for name, paths in collisions.items():
|
||||
print(f" '{name}':")
|
||||
for p in paths:
|
||||
print(f" - {p}")
|
||||
else:
|
||||
print(f"\n ℹ️ skills/ (direct, non-symlink): 0")
|
||||
print(f" └─ No direct skills found, but v3 would handle them (lines 84-86)")
|
||||
|
||||
if skills_via_symlink_parent_count > 0:
|
||||
print(f"\n ⚠️ skills/ (via symlink parent): {skills_via_symlink_parent_count}")
|
||||
print(f" └─ May need special handling")
|
||||
|
||||
print(f"\n ✅ No collisions detected!")
|
||||
|
||||
# Validate all names are valid directory names
|
||||
print("\n5️⃣ Directory Name Validation:")
|
||||
invalid_names = []
|
||||
for name in name_map:
|
||||
if not re.match(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$", name):
|
||||
invalid_names.append(name)
|
||||
|
||||
if invalid_names:
|
||||
print(f" ❌ Invalid directory names ({len(invalid_names)}):")
|
||||
for name in invalid_names[:5]:
|
||||
print(f" - '{name}'")
|
||||
else:
|
||||
print(f" ✅ All {len(name_map)} names are valid directory names!")
|
||||
|
||||
# Summary
|
||||
print("\n5️⃣ Summary:")
|
||||
print("\n6️⃣ Summary:")
|
||||
print("-" * 60)
|
||||
|
||||
total_handled = (github_skills_count + github_plugins_count +
|
||||
skills_symlinked_count + skills_direct_count)
|
||||
|
||||
print(f"\n Total SKILL.md files: {len(all_skill_files)}")
|
||||
print(f" Handled by v3 script: {total_handled}")
|
||||
|
||||
if total_handled == len(all_skill_files):
|
||||
print(f"\n ✅ 100% Coverage - All skills will be synced!")
|
||||
elif total_handled >= len(all_skill_files) * 0.99:
|
||||
print(f"\n ✅ ~100% Coverage - Script handles all skills!")
|
||||
print(f" ({len(all_skill_files) - total_handled} skills may be duplicates)")
|
||||
total = len(all_skill_files)
|
||||
unique = len(unique_names) + len(collisions)
|
||||
|
||||
print(f" Total SKILL.md files: {total}")
|
||||
print(f" Unique flat names: {len(unique_names)}")
|
||||
print(f" Collisions: {len(collisions)}")
|
||||
print(f" Missing names: {len(missing_names)}")
|
||||
|
||||
is_pass = len(collisions) == 0 and len(invalid_names) == 0
|
||||
if is_pass:
|
||||
print(f"\n ✅ ALL CHECKS PASSED")
|
||||
else:
|
||||
print(f"\n ⚠️ Partial Coverage - Missing {len(all_skill_files) - total_handled} skills")
|
||||
print(f"\n Skills not covered:")
|
||||
for loc_type, files in location_types.items():
|
||||
if loc_type not in ["github_skills", "github_plugins", "skills_symlinked", "skills_direct"]:
|
||||
print(f" - {loc_type}: {len(files)}")
|
||||
|
||||
# Test specific cases
|
||||
print("\n6️⃣ Testing Specific Edge Cases:")
|
||||
print("-" * 60)
|
||||
|
||||
skills_dir = temp_path / "skills"
|
||||
if skills_dir.exists():
|
||||
# Check for any non-symlink directories with SKILL.md
|
||||
print("\n Checking for non-symlinked skills in skills/...")
|
||||
non_symlink_skills = []
|
||||
|
||||
for item in skills_dir.rglob("*"):
|
||||
if item.is_dir() and not item.is_symlink():
|
||||
if (item / "SKILL.md").exists():
|
||||
# Check if any parent is a symlink
|
||||
has_symlink_parent = False
|
||||
for parent in item.parents:
|
||||
if parent == skills_dir:
|
||||
break
|
||||
if parent.is_symlink():
|
||||
has_symlink_parent = True
|
||||
break
|
||||
|
||||
if not has_symlink_parent:
|
||||
non_symlink_skills.append(item)
|
||||
|
||||
if non_symlink_skills:
|
||||
print(f" ✅ Found {len(non_symlink_skills)} non-symlinked skills:")
|
||||
for skill in non_symlink_skills[:5]:
|
||||
print(f" - {skill.relative_to(skills_dir)}")
|
||||
print(f" These WILL be synced by v3 (lines 84-86)")
|
||||
else:
|
||||
print(f" ℹ️ No non-symlinked skills found in skills/")
|
||||
print(f" But v3 is ready to handle them if they exist!")
|
||||
|
||||
print(f"\n ⚠️ SOME CHECKS NEED ATTENTION")
|
||||
|
||||
print("\n✨ Analysis complete!")
|
||||
|
||||
|
||||
return {
|
||||
'total': len(all_skill_files),
|
||||
'handled': total_handled,
|
||||
'breakdown': {k: len(v) for k, v in location_types.items()}
|
||||
"total": total,
|
||||
"unique": len(unique_names),
|
||||
"collisions": len(collisions),
|
||||
"missing_names": len(missing_names),
|
||||
"invalid_names": len(invalid_names),
|
||||
"passed": is_pass,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
results = analyze_skill_locations()
|
||||
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("FINAL VERDICT")
|
||||
print("=" * 60)
|
||||
|
||||
coverage_pct = (results['handled'] / results['total'] * 100) if results['total'] > 0 else 0
|
||||
|
||||
print(f"\nCoverage: {coverage_pct:.1f}%")
|
||||
print(f"Skills handled: {results['handled']}/{results['total']}")
|
||||
|
||||
if coverage_pct >= 99:
|
||||
print("\n✅ V3 SCRIPT IS COMPREHENSIVE")
|
||||
print(" All skill locations are properly handled!")
|
||||
|
||||
if results["passed"]:
|
||||
print("\n✅ V4 FLAT STRUCTURE IS VALID")
|
||||
print(" All names are unique and valid directory names!")
|
||||
else:
|
||||
print("\n⚠️ V3 SCRIPT MAY NEED ENHANCEMENT")
|
||||
print(" Some edge cases might be missed")
|
||||
|
||||
print("\n⚠️ V4 FLAT STRUCTURE NEEDS FIXES")
|
||||
if results["collisions"] > 0:
|
||||
print(f" {results['collisions']} name collisions to resolve")
|
||||
if results["invalid_names"] > 0:
|
||||
print(f" {results['invalid_names']} invalid directory names")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}")
|
||||
import traceback
|
||||
|
||||
Reference in New Issue
Block a user