refactor: flatten Microsoft skills from nested to flat directory structure

Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter
'name' field as the flat directory name under skills/, replacing the nested
skills/official/microsoft/<lang>/<category>/<service>/ hierarchy.

This fixes CI failures caused by the indexing, validation, and catalog
scripts expecting skills/<id>/SKILL.md (depth 1).

Changes:
- Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection
- Update scripts/tests/inspect_microsoft_repo.py for flat name mapping
- Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks
- Delete skills/official/ nested directory
- Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/)
- Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json)
- Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
This commit is contained in:
Ahmed Rehan
2026-02-12 00:07:15 +05:00
parent e06454dafd
commit e7ae616385
142 changed files with 5683 additions and 6097 deletions

View File

@@ -1,161 +1,454 @@
const fs = require('fs');
const path = require('path');
const fs = require("fs");
const path = require("path");
const {
listSkillIdsRecursive,
readSkill,
tokenize,
unique,
} = require('../lib/skill-utils');
} = require("../lib/skill-utils");
const ROOT = path.resolve(__dirname, '..');
const SKILLS_DIR = path.join(ROOT, 'skills');
const ROOT = path.resolve(__dirname, "..");
const SKILLS_DIR = path.join(ROOT, "skills");
const STOPWORDS = new Set([
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'from', 'has', 'have', 'in', 'into',
'is', 'it', 'its', 'of', 'on', 'or', 'our', 'out', 'over', 'that', 'the', 'their', 'they', 'this',
'to', 'use', 'when', 'with', 'you', 'your', 'will', 'can', 'if', 'not', 'only', 'also', 'more',
'best', 'practice', 'practices', 'expert', 'specialist', 'focused', 'focus', 'master', 'modern',
'advanced', 'comprehensive', 'production', 'production-ready', 'ready', 'build', 'create', 'deliver',
'design', 'implement', 'implementation', 'strategy', 'strategies', 'patterns', 'pattern', 'workflow',
'workflows', 'guide', 'template', 'templates', 'tool', 'tools', 'project', 'projects', 'support',
'manage', 'management', 'system', 'systems', 'services', 'service', 'across', 'end', 'end-to-end',
'using', 'based', 'ensure', 'ensure', 'help', 'needs', 'need', 'focuses', 'handles', 'builds', 'make',
"a",
"an",
"and",
"are",
"as",
"at",
"be",
"but",
"by",
"for",
"from",
"has",
"have",
"in",
"into",
"is",
"it",
"its",
"of",
"on",
"or",
"our",
"out",
"over",
"that",
"the",
"their",
"they",
"this",
"to",
"use",
"when",
"with",
"you",
"your",
"will",
"can",
"if",
"not",
"only",
"also",
"more",
"best",
"practice",
"practices",
"expert",
"specialist",
"focused",
"focus",
"master",
"modern",
"advanced",
"comprehensive",
"production",
"production-ready",
"ready",
"build",
"create",
"deliver",
"design",
"implement",
"implementation",
"strategy",
"strategies",
"patterns",
"pattern",
"workflow",
"workflows",
"guide",
"template",
"templates",
"tool",
"tools",
"project",
"projects",
"support",
"manage",
"management",
"system",
"systems",
"services",
"service",
"across",
"end",
"end-to-end",
"using",
"based",
"ensure",
"ensure",
"help",
"needs",
"need",
"focuses",
"handles",
"builds",
"make",
]);
const TAG_STOPWORDS = new Set([
'pro', 'expert', 'patterns', 'pattern', 'workflow', 'workflows', 'templates', 'template', 'toolkit',
'tools', 'tool', 'project', 'projects', 'guide', 'management', 'engineer', 'architect', 'developer',
'specialist', 'assistant', 'analysis', 'review', 'reviewer', 'automation', 'orchestration', 'scaffold',
'scaffolding', 'implementation', 'strategy', 'context', 'management', 'feature', 'features', 'smart',
'system', 'systems', 'design', 'development', 'development', 'test', 'testing', 'workflow',
"pro",
"expert",
"patterns",
"pattern",
"workflow",
"workflows",
"templates",
"template",
"toolkit",
"tools",
"tool",
"project",
"projects",
"guide",
"management",
"engineer",
"architect",
"developer",
"specialist",
"assistant",
"analysis",
"review",
"reviewer",
"automation",
"orchestration",
"scaffold",
"scaffolding",
"implementation",
"strategy",
"context",
"management",
"feature",
"features",
"smart",
"system",
"systems",
"design",
"development",
"development",
"test",
"testing",
"workflow",
]);
const CATEGORY_RULES = [
{
name: 'security',
name: "security",
keywords: [
'security', 'sast', 'compliance', 'privacy', 'threat', 'vulnerability', 'owasp', 'pci', 'gdpr',
'secrets', 'risk', 'malware', 'forensics', 'attack', 'incident', 'auth', 'mtls', 'zero', 'trust',
"security",
"sast",
"compliance",
"privacy",
"threat",
"vulnerability",
"owasp",
"pci",
"gdpr",
"secrets",
"risk",
"malware",
"forensics",
"attack",
"incident",
"auth",
"mtls",
"zero",
"trust",
],
},
{
name: 'infrastructure',
name: "infrastructure",
keywords: [
'kubernetes', 'k8s', 'helm', 'terraform', 'cloud', 'network', 'devops', 'gitops', 'prometheus',
'grafana', 'observability', 'monitoring', 'logging', 'tracing', 'deployment', 'istio', 'linkerd',
'service', 'mesh', 'slo', 'sre', 'oncall', 'incident', 'pipeline', 'cicd', 'ci', 'cd', 'kafka',
"kubernetes",
"k8s",
"helm",
"terraform",
"cloud",
"network",
"devops",
"gitops",
"prometheus",
"grafana",
"observability",
"monitoring",
"logging",
"tracing",
"deployment",
"istio",
"linkerd",
"service",
"mesh",
"slo",
"sre",
"oncall",
"incident",
"pipeline",
"cicd",
"ci",
"cd",
"kafka",
],
},
{
name: 'data-ai',
name: "data-ai",
keywords: [
'data', 'database', 'db', 'sql', 'postgres', 'mysql', 'analytics', 'etl', 'warehouse', 'dbt',
'ml', 'ai', 'llm', 'rag', 'vector', 'embedding', 'spark', 'airflow', 'cdc', 'pipeline',
"data",
"database",
"db",
"sql",
"postgres",
"mysql",
"analytics",
"etl",
"warehouse",
"dbt",
"ml",
"ai",
"llm",
"rag",
"vector",
"embedding",
"spark",
"airflow",
"cdc",
"pipeline",
],
},
{
name: 'development',
name: "development",
keywords: [
'python', 'javascript', 'typescript', 'java', 'golang', 'go', 'rust', 'csharp', 'dotnet', 'php',
'ruby', 'node', 'react', 'frontend', 'backend', 'mobile', 'ios', 'android', 'flutter', 'fastapi',
'django', 'nextjs', 'vue', 'api',
"python",
"javascript",
"typescript",
"java",
"golang",
"go",
"rust",
"csharp",
"dotnet",
"php",
"ruby",
"node",
"react",
"frontend",
"backend",
"mobile",
"ios",
"android",
"flutter",
"fastapi",
"django",
"nextjs",
"vue",
"api",
],
},
{
name: 'architecture',
name: "architecture",
keywords: [
'architecture', 'c4', 'microservices', 'event', 'cqrs', 'saga', 'domain', 'ddd', 'patterns',
'decision', 'adr',
"architecture",
"c4",
"microservices",
"event",
"cqrs",
"saga",
"domain",
"ddd",
"patterns",
"decision",
"adr",
],
},
{
name: 'testing',
keywords: ['testing', 'tdd', 'unit', 'e2e', 'qa', 'test'],
name: "testing",
keywords: ["testing", "tdd", "unit", "e2e", "qa", "test"],
},
{
name: 'business',
name: "business",
keywords: [
'business', 'market', 'sales', 'finance', 'startup', 'legal', 'hr', 'product', 'customer', 'seo',
'marketing', 'kpi', 'contract', 'employment',
"business",
"market",
"sales",
"finance",
"startup",
"legal",
"hr",
"product",
"customer",
"seo",
"marketing",
"kpi",
"contract",
"employment",
],
},
{
name: 'workflow',
keywords: ['workflow', 'orchestration', 'conductor', 'automation', 'process', 'collaboration'],
name: "workflow",
keywords: [
"workflow",
"orchestration",
"conductor",
"automation",
"process",
"collaboration",
],
},
];
const BUNDLE_RULES = {
'core-dev': {
description: 'Core development skills across languages, frameworks, and backend/frontend fundamentals.',
"core-dev": {
description:
"Core development skills across languages, frameworks, and backend/frontend fundamentals.",
keywords: [
'python', 'javascript', 'typescript', 'go', 'golang', 'rust', 'java', 'node', 'frontend', 'backend',
'react', 'fastapi', 'django', 'nextjs', 'api', 'mobile', 'ios', 'android', 'flutter', 'php', 'ruby',
"python",
"javascript",
"typescript",
"go",
"golang",
"rust",
"java",
"node",
"frontend",
"backend",
"react",
"fastapi",
"django",
"nextjs",
"api",
"mobile",
"ios",
"android",
"flutter",
"php",
"ruby",
],
},
'security-core': {
description: 'Security, privacy, and compliance essentials.',
"security-core": {
description: "Security, privacy, and compliance essentials.",
keywords: [
'security', 'sast', 'compliance', 'threat', 'risk', 'privacy', 'secrets', 'owasp', 'gdpr', 'pci',
'vulnerability', 'auth',
"security",
"sast",
"compliance",
"threat",
"risk",
"privacy",
"secrets",
"owasp",
"gdpr",
"pci",
"vulnerability",
"auth",
],
},
'k8s-core': {
description: 'Kubernetes and service mesh essentials.',
keywords: ['kubernetes', 'k8s', 'helm', 'istio', 'linkerd', 'service', 'mesh'],
},
'data-core': {
description: 'Data engineering and analytics foundations.',
"k8s-core": {
description: "Kubernetes and service mesh essentials.",
keywords: [
'data', 'database', 'sql', 'dbt', 'airflow', 'spark', 'analytics', 'etl', 'warehouse', 'postgres',
'mysql', 'kafka',
"kubernetes",
"k8s",
"helm",
"istio",
"linkerd",
"service",
"mesh",
],
},
'ops-core': {
description: 'Operations, observability, and delivery pipelines.',
"data-core": {
description: "Data engineering and analytics foundations.",
keywords: [
'observability', 'monitoring', 'logging', 'tracing', 'prometheus', 'grafana', 'devops', 'gitops',
'deployment', 'cicd', 'pipeline', 'slo', 'sre', 'incident',
"data",
"database",
"sql",
"dbt",
"airflow",
"spark",
"analytics",
"etl",
"warehouse",
"postgres",
"mysql",
"kafka",
],
},
"ops-core": {
description: "Operations, observability, and delivery pipelines.",
keywords: [
"observability",
"monitoring",
"logging",
"tracing",
"prometheus",
"grafana",
"devops",
"gitops",
"deployment",
"cicd",
"pipeline",
"slo",
"sre",
"incident",
],
},
};
const CURATED_COMMON = [
'bash-pro',
'python-pro',
'javascript-pro',
'typescript-pro',
'golang-pro',
'rust-pro',
'java-pro',
'frontend-developer',
'backend-architect',
'nodejs-backend-patterns',
'fastapi-pro',
'api-design-principles',
'sql-pro',
'database-architect',
'kubernetes-architect',
'terraform-specialist',
'observability-engineer',
'security-auditor',
'sast-configuration',
'gitops-workflow',
"bash-pro",
"python-pro",
"javascript-pro",
"typescript-pro",
"golang-pro",
"rust-pro",
"java-pro",
"frontend-developer",
"backend-architect",
"nodejs-backend-patterns",
"fastapi-pro",
"api-design-principles",
"sql-pro",
"database-architect",
"kubernetes-architect",
"terraform-specialist",
"observability-engineer",
"security-auditor",
"sast-configuration",
"gitops-workflow",
];
function normalizeTokens(tokens) {
return unique(tokens.map(token => token.toLowerCase())).filter(Boolean);
return unique(tokens.map((token) => token.toLowerCase())).filter(Boolean);
}
function deriveTags(skill) {
let tags = Array.isArray(skill.tags) ? skill.tags : [];
tags = tags.map(tag => tag.toLowerCase()).filter(Boolean);
tags = tags.map((tag) => tag.toLowerCase()).filter(Boolean);
if (!tags.length) {
tags = skill.id
.split('-')
.map(tag => tag.toLowerCase())
.filter(tag => tag && !TAG_STOPWORDS.has(tag));
.split("-")
.map((tag) => tag.toLowerCase())
.filter((tag) => tag && !TAG_STOPWORDS.has(tag));
}
return normalizeTokens(tags);
@@ -177,17 +470,18 @@ function detectCategory(skill, tags) {
}
}
return 'general';
return "general";
}
function buildTriggers(skill, tags) {
const tokens = tokenize(`${skill.name} ${skill.description}`)
.filter(token => token.length >= 2 && !STOPWORDS.has(token));
const tokens = tokenize(`${skill.name} ${skill.description}`).filter(
(token) => token.length >= 2 && !STOPWORDS.has(token),
);
return unique([...tags, ...tokens]).slice(0, 12);
}
function buildAliases(skills) {
const existingIds = new Set(skills.map(skill => skill.id));
const existingIds = new Set(skills.map((skill) => skill.id));
const aliases = {};
const used = new Set();
@@ -200,7 +494,7 @@ function buildAliases(skills) {
}
}
const tokens = skill.id.split('-').filter(Boolean);
const tokens = skill.id.split("-").filter(Boolean);
if (skill.id.length < 28 || tokens.length < 4) continue;
const deduped = [];
@@ -211,10 +505,11 @@ function buildAliases(skills) {
deduped.push(token);
}
const aliasTokens = deduped.length > 3
? [deduped[0], deduped[1], deduped[deduped.length - 1]]
: deduped;
const alias = unique(aliasTokens).join('-');
const aliasTokens =
deduped.length > 3
? [deduped[0], deduped[1], deduped[deduped.length - 1]]
: deduped;
const alias = unique(aliasTokens).join("-");
if (!alias || alias === skill.id) continue;
if (existingIds.has(alias) || used.has(alias)) continue;
@@ -241,11 +536,11 @@ function buildBundles(skills) {
for (const [bundleName, rule] of Object.entries(BUNDLE_RULES)) {
const bundleSkills = [];
const keywords = rule.keywords.map(keyword => keyword.toLowerCase());
const keywords = rule.keywords.map((keyword) => keyword.toLowerCase());
for (const skill of skills) {
const tokenSet = skillTokens.get(skill.id) || new Set();
if (keywords.some(keyword => tokenSet.has(keyword))) {
if (keywords.some((keyword) => tokenSet.has(keyword))) {
bundleSkills.push(skill.id);
}
}
@@ -256,49 +551,58 @@ function buildBundles(skills) {
};
}
const common = CURATED_COMMON.filter(skillId => skillTokens.has(skillId));
const common = CURATED_COMMON.filter((skillId) => skillTokens.has(skillId));
return { bundles, common };
}
function truncate(value, limit) {
if (!value || value.length <= limit) return value || '';
if (!value || value.length <= limit) return value || "";
return `${value.slice(0, limit - 3)}...`;
}
function renderCatalogMarkdown(catalog) {
const lines = [];
lines.push('# Skill Catalog');
lines.push('');
lines.push("# Skill Catalog");
lines.push("");
lines.push(`Generated at: ${catalog.generatedAt}`);
lines.push('');
lines.push("");
lines.push(`Total skills: ${catalog.total}`);
lines.push('');
lines.push("");
const categories = Array.from(new Set(catalog.skills.map(skill => skill.category))).sort();
const categories = Array.from(
new Set(catalog.skills.map((skill) => skill.category)),
).sort();
for (const category of categories) {
const grouped = catalog.skills.filter(skill => skill.category === category);
const grouped = catalog.skills.filter(
(skill) => skill.category === category,
);
lines.push(`## ${category} (${grouped.length})`);
lines.push('');
lines.push('| Skill | Description | Tags | Triggers |');
lines.push('| --- | --- | --- | --- |');
lines.push("");
lines.push("| Skill | Description | Tags | Triggers |");
lines.push("| --- | --- | --- | --- |");
for (const skill of grouped) {
const description = truncate(skill.description, 160).replace(/\|/g, '\\|');
const tags = skill.tags.join(', ');
const triggers = skill.triggers.join(', ');
lines.push(`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`);
const description = truncate(skill.description, 160).replace(
/\|/g,
"\\|",
);
const tags = skill.tags.join(", ");
const triggers = skill.triggers.join(", ");
lines.push(
`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`,
);
}
lines.push('');
lines.push("");
}
return lines.join('\n');
return lines.join("\n");
}
function buildCatalog() {
const skillRelPaths = listSkillIdsRecursive(SKILLS_DIR);
const skills = skillRelPaths.map(relPath => readSkill(SKILLS_DIR, relPath));
const skills = skillRelPaths.map((relPath) => readSkill(SKILLS_DIR, relPath));
const catalogSkills = [];
for (const skill of skills) {
@@ -318,26 +622,32 @@ function buildCatalog() {
}
const catalog = {
generatedAt: process.env.SOURCE_DATE_EPOCH
? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
: (process.env.CI ? '2026-02-08T00:00:00.000Z' : new Date().toISOString()),
generatedAt: process.env.SOURCE_DATE_EPOCH
? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
: "2026-02-08T00:00:00.000Z",
total: catalogSkills.length,
skills: catalogSkills.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)),
skills: catalogSkills.sort((a, b) =>
a.id < b.id ? -1 : a.id > b.id ? 1 : 0,
),
};
const aliases = buildAliases(catalog.skills);
const bundleData = buildBundles(catalog.skills);
const catalogPath = path.join(ROOT, 'data', 'catalog.json');
const catalogMarkdownPath = path.join(ROOT, 'CATALOG.md');
const bundlesPath = path.join(ROOT, 'data', 'bundles.json');
const aliasesPath = path.join(ROOT, 'data', 'aliases.json');
const catalogPath = path.join(ROOT, "data", "catalog.json");
const catalogMarkdownPath = path.join(ROOT, "CATALOG.md");
const bundlesPath = path.join(ROOT, "data", "bundles.json");
const aliasesPath = path.join(ROOT, "data", "aliases.json");
fs.writeFileSync(catalogPath, JSON.stringify(catalog, null, 2));
fs.writeFileSync(catalogMarkdownPath, renderCatalogMarkdown(catalog));
fs.writeFileSync(
bundlesPath,
JSON.stringify({ generatedAt: catalog.generatedAt, ...bundleData }, null, 2),
JSON.stringify(
{ generatedAt: catalog.generatedAt, ...bundleData },
null,
2,
),
);
fs.writeFileSync(
aliasesPath,

View File

@@ -1,285 +1,293 @@
#!/usr/bin/env python3
"""
Sync Microsoft Skills Repository - v3
Preserves original structure from skills/ directory and handles all locations
Sync Microsoft Skills Repository - v4 (Flat Structure)
Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory
name under skills/ to comply with the repository's indexing conventions.
"""
import re
import shutil
import subprocess
import tempfile
from pathlib import Path
import json
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
TARGET_DIR = Path(__file__).parent.parent / "skills"
REPO_ROOT = Path(__file__).parent.parent
TARGET_DIR = REPO_ROOT / "skills"
DOCS_DIR = REPO_ROOT / "docs"
def clone_repo(temp_dir: Path):
"""Clone Microsoft skills repository"""
"""Clone Microsoft skills repository (shallow)."""
print("🔄 Cloning Microsoft Skills repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)],
check=True
check=True,
)
def find_all_skills(source_dir: Path):
"""Find all SKILL.md files in the repository"""
all_skills = {}
# Search in .github/skills/
github_skills = source_dir / ".github" / "skills"
if github_skills.exists():
for skill_dir in github_skills.iterdir():
if skill_dir.is_dir() and (skill_dir / "SKILL.md").exists():
all_skills[skill_dir.name] = skill_dir
# Search in .github/plugins/
github_plugins = source_dir / ".github" / "plugins"
if github_plugins.exists():
for skill_file in github_plugins.rglob("SKILL.md"):
skill_dir = skill_file.parent
skill_name = skill_dir.name
if skill_name not in all_skills:
all_skills[skill_name] = skill_dir
return all_skills
def sync_skills_preserve_structure(source_dir: Path, target_dir: Path):
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def generate_fallback_name(relative_path: Path) -> str:
"""
Sync skills preserving the original skills/ directory structure.
This is better than auto-categorization since MS already organized them.
Generate a fallback directory name when frontmatter 'name' is missing.
Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'.
"""
parts = [p for p in relative_path.parts if p]
return "ms-" + "-".join(parts)
def find_skills_in_directory(source_dir: Path):
"""
Walk the Microsoft repo's skills/ directory (which uses symlinks)
and resolve each to its actual SKILL.md content.
Returns list of dicts: {relative_path, skill_md_path, source_dir}.
"""
skills_source = source_dir / "skills"
results = []
if not skills_source.exists():
print(" ⚠️ skills/ directory not found, will use flat structure")
return sync_skills_flat(source_dir, target_dir)
# First, find all actual skill content
all_skills = find_all_skills(source_dir)
print(f" 📂 Found {len(all_skills)} total skills in repository")
synced_count = 0
skill_metadata = []
# Walk through the skills/ directory structure
return results
for item in skills_source.rglob("*"):
# Skip non-directories
if not item.is_dir():
continue
# Check if this directory (or its symlink target) contains a SKILL.md
skill_md = None
skill_source_dir = None
# If it's a symlink, resolve it
actual_dir = None
if item.is_symlink():
try:
resolved = item.resolve()
if (resolved / "SKILL.md").exists():
skill_md = resolved / "SKILL.md"
skill_source_dir = resolved
except:
actual_dir = resolved
except Exception:
continue
elif (item / "SKILL.md").exists():
skill_md = item / "SKILL.md"
skill_source_dir = item
actual_dir = item
if skill_md is None:
continue
# Get relative path from skills/ directory - this preserves MS's organization
try:
relative_path = item.relative_to(skills_source)
except ValueError:
# Shouldn't happen, but handle it
continue
# Create target directory preserving structure
target_skill_dir = target_dir / "official" / "microsoft" / relative_path
target_skill_dir.mkdir(parents=True, exist_ok=True)
# Copy SKILL.md
shutil.copy2(skill_md, target_skill_dir / "SKILL.md")
# Copy other files from the actual skill directory
for file_item in skill_source_dir.iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
# Collect metadata
skill_metadata.append({
"path": str(relative_path),
"name": item.name,
"category": str(relative_path.parent),
"source": str(skill_source_dir.relative_to(source_dir))
})
synced_count += 1
print(f" ✅ Synced: {relative_path}")
# Also sync any skills from .github/plugins that aren't symlinked in skills/
plugin_skills = find_plugin_skills(source_dir, skill_metadata)
if plugin_skills:
print(f"\n 📦 Found {len(plugin_skills)} additional plugin skills")
for plugin_skill in plugin_skills:
target_skill_dir = target_dir / "official" / "microsoft" / "plugins" / plugin_skill['name']
target_skill_dir.mkdir(parents=True, exist_ok=True)
# Copy SKILL.md
shutil.copy2(plugin_skill['source'] / "SKILL.md", target_skill_dir / "SKILL.md")
# Copy other files
for file_item in plugin_skill['source'].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"path": f"plugins/{plugin_skill['name']}",
"name": plugin_skill['name'],
"category": "plugins",
"source": str(plugin_skill['source'].relative_to(source_dir))
})
synced_count += 1
print(f" ✅ Synced: plugins/{plugin_skill['name']}")
return synced_count, skill_metadata
def find_plugin_skills(source_dir: Path, already_synced: list):
"""Find plugin skills that haven't been synced yet"""
synced_names = {s['name'] for s in already_synced}
plugin_skills = []
results.append({
"relative_path": relative_path,
"skill_md": skill_md,
"source_dir": actual_dir,
})
return results
def find_plugin_skills(source_dir: Path, already_synced_names: set):
"""Find plugin skills in .github/plugins/ that haven't been synced yet."""
results = []
github_plugins = source_dir / ".github" / "plugins"
if github_plugins.exists():
for skill_file in github_plugins.rglob("SKILL.md"):
skill_dir = skill_file.parent
skill_name = skill_dir.name
if skill_name not in synced_names:
plugin_skills.append({
'name': skill_name,
'source': skill_dir
})
return plugin_skills
if not github_plugins.exists():
return results
for skill_file in github_plugins.rglob("SKILL.md"):
skill_dir = skill_file.parent
skill_name = skill_dir.name
if skill_name not in already_synced_names:
results.append({
"relative_path": Path("plugins") / skill_name,
"skill_md": skill_file,
"source_dir": skill_dir,
})
return results
def sync_skills_flat(source_dir: Path, target_dir: Path):
"""Fallback: sync all skills in a flat structure"""
all_skills = find_all_skills(source_dir)
"""
Sync all Microsoft skills into a flat structure under skills/.
Uses frontmatter 'name' as directory name, with collision detection.
"""
all_skill_entries = find_skills_in_directory(source_dir)
print(f" 📂 Found {len(all_skill_entries)} skills in skills/ directory")
synced_count = 0
skill_metadata = []
for skill_name, skill_dir in all_skills.items():
target_skill_dir = target_dir / "official" / "microsoft" / skill_name
# name -> original relative_path (for collision logging)
used_names: dict[str, str] = {}
for entry in all_skill_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = generate_fallback_name(entry["relative_path"])
print(
f" ⚠️ No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}")
# Collision detection
if skill_name in used_names:
original = used_names[skill_name]
print(
f" ⚠️ Name collision '{skill_name}': {entry['relative_path']} vs {original}")
# Append language prefix from path to disambiguate
lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown"
skill_name = f"{skill_name}-{lang}"
print(f" Resolved to: {skill_name}")
used_names[skill_name] = str(entry["relative_path"])
# Create flat target directory
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
# Copy SKILL.md
shutil.copy2(skill_dir / "SKILL.md", target_skill_dir / "SKILL.md")
# Copy other files
for item in skill_dir.iterdir():
if item.name != "SKILL.md" and item.is_file():
shutil.copy2(item, target_skill_dir / item.name)
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
# Copy other files from the skill directory
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"path": skill_name,
"name": skill_name,
"category": "root"
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills",
})
synced_count += 1
print(f"Synced: {skill_name}")
print(f"{entry['relative_path']} → skills/{skill_name}/")
# Sync plugin skills
synced_names = set(used_names.keys())
plugin_entries = find_plugin_skills(
source_dir, {e["source_dir"].name for e in all_skill_entries})
if plugin_entries:
print(f"\n 📦 Found {len(plugin_entries)} additional plugin skills")
for entry in plugin_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = entry["source_dir"].name
if skill_name in synced_names:
skill_name = f"{skill_name}-plugin"
synced_names.add(skill_name)
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills (plugin)",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
return synced_count, skill_metadata
def create_attribution_file(target_dir: Path, metadata: list):
"""Create attribution and metadata file"""
def save_attribution(metadata: list):
"""Save attribution metadata to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
attribution = {
"source": "microsoft/skills",
"repository": "https://github.com/microsoft/skills",
"license": "MIT",
"synced_skills": len(metadata),
"structure": "flat (frontmatter name as directory name)",
"skills": metadata,
"note": "Symlinks resolved and content copied for compatibility. Original directory structure preserved."
}
ms_dir = target_dir / "official" / "microsoft"
ms_dir.mkdir(parents=True, exist_ok=True)
with open(ms_dir / "ATTRIBUTION.json", "w") as f:
with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f:
json.dump(attribution, f, indent=2)
def copy_documentation(source_dir: Path, target_dir: Path):
"""Copy LICENSE and README files"""
ms_dir = target_dir / "official" / "microsoft"
ms_dir.mkdir(parents=True, exist_ok=True)
def copy_license(source_dir: Path):
"""Copy the Microsoft LICENSE to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
if (source_dir / "LICENSE").exists():
shutil.copy2(source_dir / "LICENSE", ms_dir / "LICENSE")
if (source_dir / "README.md").exists():
shutil.copy2(source_dir / "README.md", ms_dir / "README-MICROSOFT.md")
shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT")
def main():
"""Main sync function"""
print("🚀 Microsoft Skills Sync Script v3")
print("=" * 50)
"""Main sync function."""
print("🚀 Microsoft Skills Sync Script v4 (Flat Structure)")
print("=" * 55)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
try:
# Clone repository
clone_repo(temp_path)
# Create target directory
TARGET_DIR.mkdir(parents=True, exist_ok=True)
# Sync skills (preserving structure)
print("\n🔗 Resolving symlinks and preserving directory structure...")
count, metadata = sync_skills_preserve_structure(temp_path, TARGET_DIR)
# Copy documentation
print("\n📄 Copying documentation...")
copy_documentation(temp_path, TARGET_DIR)
# Create attribution file
print("📝 Creating attribution metadata...")
create_attribution_file(TARGET_DIR, metadata)
print(f"\n✨ Success! Synced {count} Microsoft skills")
print(f"📁 Location: {TARGET_DIR / 'official' / 'microsoft'}")
# Show structure summary
ms_dir = TARGET_DIR / "official" / "microsoft"
categories = set()
print("\n🔗 Resolving symlinks and flattening into skills/<name>/...")
count, metadata = sync_skills_flat(temp_path, TARGET_DIR)
print("\n📄 Saving attribution...")
save_attribution(metadata)
copy_license(temp_path)
print(
f"\n✨ Success! Synced {count} Microsoft skills (flat structure)")
print(f"📁 Location: {TARGET_DIR}/")
# Show summary of languages
languages = set()
for skill in metadata:
cat = skill.get('category', 'root')
if cat != 'root':
categories.add(cat.split('/')[0] if '/' in cat else cat)
parts = skill["original_path"].split("/")
if len(parts) >= 1 and parts[0] != "plugins":
languages.add(parts[0])
print(f"\n📊 Organization:")
print(f" Total skills: {count}")
print(f" Categories: {', '.join(sorted(categories)[:10])}")
if len(categories) > 10:
print(f" ... and {len(categories) - 10} more")
print(f" Languages: {', '.join(sorted(languages))}")
print("\n📋 Next steps:")
print("1. Review synced skills")
print("2. Run: npm run validate")
print("3. Update CATALOG.md")
print("4. Update docs/SOURCES.md")
print("5. Commit changes and create PR")
print("1. Delete old skills/official/ directory (if it exists)")
print("2. Run: npm run build")
print("3. Commit changes and create PR")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
exit(main())
exit(main())

View File

@@ -1,149 +1,98 @@
#!/usr/bin/env python3
"""
Debug script to inspect Microsoft Skills repository structure - v2
Handles all skill locations including plugins
Inspect Microsoft Skills Repository Structure
Shows the repository layout, skill locations, and what flat names would be generated.
"""
import re
import subprocess
import tempfile
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def inspect_repo():
"""Inspect the Microsoft skills repository structure"""
"""Inspect the Microsoft skills repository structure."""
print("🔍 Inspecting Microsoft Skills Repository Structure")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
print("\n1⃣ Cloning repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
check=True,
capture_output=True
capture_output=True,
)
print("\n2⃣ Repository structure:")
print("\nTop-level directories:")
for item in temp_path.iterdir():
if item.is_dir():
print(f" 📁 {item.name}/")
# Check .github/skills
github_skills = temp_path / ".github" / "skills"
if github_skills.exists():
skill_dirs = [d for d in github_skills.iterdir() if d.is_dir()]
print(f"\n3⃣ Found {len(skill_dirs)} directories in .github/skills/:")
for skill_dir in skill_dirs[:5]:
has_skill_md = (skill_dir / "SKILL.md").exists()
print(f" {'' if has_skill_md else ''} {skill_dir.name}")
if len(skill_dirs) > 5:
print(f" ... and {len(skill_dirs) - 5} more")
# Check .github/plugins
github_plugins = temp_path / ".github" / "plugins"
if github_plugins.exists():
plugin_skills = list(github_plugins.rglob("SKILL.md"))
print(f"\n🔌 Found {len(plugin_skills)} plugin skills in .github/plugins/:")
for skill_file in plugin_skills[:5]:
try:
rel_path = skill_file.relative_to(github_plugins)
print(f"{rel_path}")
except ValueError:
print(f"{skill_file.name}")
if len(plugin_skills) > 5:
print(f" ... and {len(plugin_skills) - 5} more")
# Check skills directory
skills_dir = temp_path / "skills"
if skills_dir.exists():
print(f"\n4⃣ Checking skills/ directory structure:")
# Count items
all_items = list(skills_dir.rglob("*"))
symlink_dirs = [s for s in all_items if s.is_symlink() and s.is_dir()]
symlink_files = [s for s in all_items if s.is_symlink() and not s.is_dir()]
regular_dirs = [s for s in all_items if s.is_dir() and not s.is_symlink()]
print(f" Total items: {len(all_items)}")
print(f" Regular directories: {len(regular_dirs)}")
print(f" Symlinked directories: {len(symlink_dirs)}")
print(f" Symlinked files: {len(symlink_files)}")
# Show directory structure
print(f"\n Top-level categories in skills/:")
for item in skills_dir.iterdir():
if item.is_dir():
# Count subdirs
subdirs = [d for d in item.iterdir() if d.is_dir()]
print(f" 📁 {item.name}/ ({len(subdirs)} items)")
if symlink_dirs:
print(f"\n Sample symlinked directories:")
for symlink in symlink_dirs[:5]:
try:
target = symlink.resolve()
relative = symlink.relative_to(skills_dir)
target_name = target.name if target.exists() else "broken"
print(f" {relative}{target_name}")
except:
pass
# Check for all SKILL.md files
print(f"\n5⃣ Comprehensive SKILL.md search:")
# Find all SKILL.md files
all_skill_mds = list(temp_path.rglob("SKILL.md"))
print(f" Total SKILL.md files found: {len(all_skill_mds)}")
# Categorize by location
locations = {}
for skill_md in all_skill_mds:
print(f"\n2 Total SKILL.md files found: {len(all_skill_mds)}")
# Show flat name mapping
print(f"\n3⃣ Flat Name Mapping (frontmatter 'name' → directory name):")
print("-" * 60)
names_seen: dict[str, list[str]] = {}
for skill_md in sorted(all_skill_mds, key=lambda p: str(p)):
try:
if ".github/skills" in str(skill_md):
loc = ".github/skills"
elif ".github/plugins" in str(skill_md):
loc = ".github/plugins"
elif "/skills/" in str(skill_md):
loc = "skills/ (structure)"
else:
loc = "other"
locations[loc] = locations.get(loc, 0) + 1
except:
pass
print(f"\n Distribution by location:")
for loc, count in sorted(locations.items()):
print(f" {loc}: {count}")
# Show sample skills from each major category
print(f"\n6️⃣ Sample skills by category:")
if skills_dir.exists():
for category in list(skills_dir.iterdir())[:3]:
if category.is_dir():
skills_in_cat = [s for s in category.rglob("*") if s.is_dir() and (s.is_symlink() or (s / "SKILL.md").exists())]
print(f"\n {category.name}/ ({len(skills_in_cat)} skills):")
for skill in skills_in_cat[:3]:
try:
rel = skill.relative_to(skills_dir)
print(f" - {rel}")
except:
pass
print("\n7⃣ Recommendations:")
print(" ✅ Preserve skills/ directory structure (Microsoft's organization)")
print(" ✅ Resolve symlinks to actual content in .github/skills/")
print(" ✅ Include plugin skills from .github/plugins/")
print(" ✅ This gives you the cleanest, most maintainable structure")
rel = skill_md.parent.relative_to(temp_path)
except ValueError:
rel = skill_md.parent
name = extract_skill_name(skill_md)
display_name = name if name else f"(no name → ms-{'-'.join(rel.parts[1:])})"
print(f" {rel}{display_name}")
effective_name = name if name else f"ms-{'-'.join(rel.parts[1:])}"
if effective_name not in names_seen:
names_seen[effective_name] = []
names_seen[effective_name].append(str(rel))
# Collision check
collisions = {n: paths for n, paths in names_seen.items()
if len(paths) > 1}
if collisions:
print(f"\n4️⃣ ⚠️ Name Collisions Detected ({len(collisions)}):")
for name, paths in collisions.items():
print(f" '{name}':")
for p in paths:
print(f" - {p}")
else:
print(
f"\n4⃣ ✅ No name collisions — all {len(names_seen)} names are unique!")
print("\n✨ Inspection complete!")
if __name__ == "__main__":
try:
inspect_repo()
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
traceback.print_exc()

View File

@@ -1,9 +1,10 @@
#!/usr/bin/env python3
"""
Test Script: Verify Microsoft Skills Sync Coverage
Tests all possible skill locations and structures
Test Script: Verify Microsoft Skills Sync Coverage and Flat Name Uniqueness
Ensures all skills are captured and no directory name collisions exist.
"""
import re
import subprocess
import tempfile
from pathlib import Path
@@ -11,204 +12,177 @@ from collections import defaultdict
MS_REPO = "https://github.com/microsoft/skills.git"
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def analyze_skill_locations():
"""
Comprehensive analysis of all skill locations in Microsoft repo.
Verifies that v3 script will catch everything.
Verifies flat name uniqueness and coverage.
"""
print("🔬 Comprehensive Skill Location Analysis")
print("🔬 Comprehensive Skill Coverage & Uniqueness Analysis")
print("=" * 60)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
print("\n1⃣ Cloning repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
check=True,
capture_output=True
capture_output=True,
)
# Find ALL SKILL.md files in the entire repo
# Find ALL SKILL.md files
all_skill_files = list(temp_path.rglob("SKILL.md"))
print(f"\n2⃣ Total SKILL.md files found: {len(all_skill_files)}")
# Categorize by location type
# Categorize by location
location_types = defaultdict(list)
for skill_file in all_skill_files:
skill_dir = skill_file.parent
# Determine location type
if ".github/skills" in str(skill_file):
path_str = str(skill_file)
if ".github/skills" in path_str:
location_types["github_skills"].append(skill_file)
elif ".github/plugins" in str(skill_file):
elif ".github/plugins" in path_str:
location_types["github_plugins"].append(skill_file)
elif "/skills/" in str(skill_file):
# This is in the skills/ directory structure
# Check if it's via symlink or actual file
try:
skills_root = temp_path / "skills"
if skills_root in skill_file.parents:
# This skill is somewhere under skills/
# But is it a symlink or actual?
if skill_dir.is_symlink():
location_types["skills_symlinked"].append(skill_file)
else:
# Check if any parent is a symlink
has_symlink_parent = False
for parent in skill_file.parents:
if parent == skills_root:
break
if parent.is_symlink():
has_symlink_parent = True
break
if has_symlink_parent:
location_types["skills_via_symlink_parent"].append(skill_file)
else:
location_types["skills_direct"].append(skill_file)
except:
location_types["unknown"].append(skill_file)
elif "/skills/" in path_str:
location_types["skills_dir"].append(skill_file)
else:
location_types["other"].append(skill_file)
# Display results
print("\n3⃣ Skills by Location Type:")
print("-" * 60)
for loc_type, files in sorted(location_types.items()):
print(f"\n 📍 {loc_type}: {len(files)} skills")
if len(files) <= 5:
for f in files:
try:
rel = f.relative_to(temp_path)
print(f" - {rel}")
except:
print(f" - {f.name}")
else:
for f in files[:3]:
try:
rel = f.relative_to(temp_path)
print(f" - {rel}")
except:
print(f" - {f.name}")
print(f" ... and {len(files) - 3} more")
# Verify v3 coverage
print("\n4⃣ V3 Script Coverage Analysis:")
print(f" 📍 {loc_type}: {len(files)} skills")
# Flat name uniqueness check
print("\n4⃣ Flat Name Uniqueness Check:")
print("-" * 60)
github_skills_count = len(location_types["github_skills"])
github_plugins_count = len(location_types["github_plugins"])
skills_symlinked_count = len(location_types["skills_symlinked"])
skills_direct_count = len(location_types["skills_direct"])
skills_via_symlink_parent_count = len(location_types["skills_via_symlink_parent"])
print(f"\n ✅ .github/skills/: {github_skills_count}")
print(f" └─ Handled by: find_all_skills() function")
print(f"\n ✅ .github/plugins/: {github_plugins_count}")
print(f" └─ Handled by: find_plugin_skills() function")
print(f"\n ✅ skills/ (symlinked dirs): {skills_symlinked_count}")
print(f" └─ Handled by: sync_skills_preserve_structure() lines 76-83")
if skills_direct_count > 0:
print(f"\n ✅ skills/ (direct, non-symlink): {skills_direct_count}")
print(f" └─ Handled by: sync_skills_preserve_structure() lines 84-86")
name_map: dict[str, list[str]] = {}
missing_names = []
for skill_file in all_skill_files:
try:
rel = skill_file.parent.relative_to(temp_path)
except ValueError:
rel = skill_file.parent
name = extract_skill_name(skill_file)
if not name:
missing_names.append(str(rel))
# Generate fallback
parts = [p for p in rel.parts if p not in (
".github", "skills", "plugins")]
name = "ms-" + "-".join(parts) if parts else str(rel)
if name not in name_map:
name_map[name] = []
name_map[name].append(str(rel))
# Report results
collisions = {n: paths for n, paths in name_map.items()
if len(paths) > 1}
unique_names = {n: paths for n,
paths in name_map.items() if len(paths) == 1}
print(f"\n ✅ Unique names: {len(unique_names)}")
if missing_names:
print(
f"\n ⚠️ Skills missing frontmatter 'name' ({len(missing_names)}):")
for path in missing_names[:5]:
print(f" - {path}")
if len(missing_names) > 5:
print(f" ... and {len(missing_names) - 5} more")
if collisions:
print(f"\n ❌ Name collisions ({len(collisions)}):")
for name, paths in collisions.items():
print(f" '{name}':")
for p in paths:
print(f" - {p}")
else:
print(f"\n skills/ (direct, non-symlink): 0")
print(f" └─ No direct skills found, but v3 would handle them (lines 84-86)")
if skills_via_symlink_parent_count > 0:
print(f"\n ⚠️ skills/ (via symlink parent): {skills_via_symlink_parent_count}")
print(f" └─ May need special handling")
print(f"\n ✅ No collisions detected!")
# Validate all names are valid directory names
print("\n5⃣ Directory Name Validation:")
invalid_names = []
for name in name_map:
if not re.match(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$", name):
invalid_names.append(name)
if invalid_names:
print(f" ❌ Invalid directory names ({len(invalid_names)}):")
for name in invalid_names[:5]:
print(f" - '{name}'")
else:
print(f" ✅ All {len(name_map)} names are valid directory names!")
# Summary
print("\n5️⃣ Summary:")
print("\n6️⃣ Summary:")
print("-" * 60)
total_handled = (github_skills_count + github_plugins_count +
skills_symlinked_count + skills_direct_count)
print(f"\n Total SKILL.md files: {len(all_skill_files)}")
print(f" Handled by v3 script: {total_handled}")
if total_handled == len(all_skill_files):
print(f"\n ✅ 100% Coverage - All skills will be synced!")
elif total_handled >= len(all_skill_files) * 0.99:
print(f"\n~100% Coverage - Script handles all skills!")
print(f" ({len(all_skill_files) - total_handled} skills may be duplicates)")
total = len(all_skill_files)
unique = len(unique_names) + len(collisions)
print(f" Total SKILL.md files: {total}")
print(f" Unique flat names: {len(unique_names)}")
print(f" Collisions: {len(collisions)}")
print(f" Missing names: {len(missing_names)}")
is_pass = len(collisions) == 0 and len(invalid_names) == 0
if is_pass:
print(f"\nALL CHECKS PASSED")
else:
print(f"\n ⚠️ Partial Coverage - Missing {len(all_skill_files) - total_handled} skills")
print(f"\n Skills not covered:")
for loc_type, files in location_types.items():
if loc_type not in ["github_skills", "github_plugins", "skills_symlinked", "skills_direct"]:
print(f" - {loc_type}: {len(files)}")
# Test specific cases
print("\n6⃣ Testing Specific Edge Cases:")
print("-" * 60)
skills_dir = temp_path / "skills"
if skills_dir.exists():
# Check for any non-symlink directories with SKILL.md
print("\n Checking for non-symlinked skills in skills/...")
non_symlink_skills = []
for item in skills_dir.rglob("*"):
if item.is_dir() and not item.is_symlink():
if (item / "SKILL.md").exists():
# Check if any parent is a symlink
has_symlink_parent = False
for parent in item.parents:
if parent == skills_dir:
break
if parent.is_symlink():
has_symlink_parent = True
break
if not has_symlink_parent:
non_symlink_skills.append(item)
if non_symlink_skills:
print(f" ✅ Found {len(non_symlink_skills)} non-symlinked skills:")
for skill in non_symlink_skills[:5]:
print(f" - {skill.relative_to(skills_dir)}")
print(f" These WILL be synced by v3 (lines 84-86)")
else:
print(f" No non-symlinked skills found in skills/")
print(f" But v3 is ready to handle them if they exist!")
print(f"\n ⚠️ SOME CHECKS NEED ATTENTION")
print("\n✨ Analysis complete!")
return {
'total': len(all_skill_files),
'handled': total_handled,
'breakdown': {k: len(v) for k, v in location_types.items()}
"total": total,
"unique": len(unique_names),
"collisions": len(collisions),
"missing_names": len(missing_names),
"invalid_names": len(invalid_names),
"passed": is_pass,
}
if __name__ == "__main__":
try:
results = analyze_skill_locations()
print("\n" + "=" * 60)
print("FINAL VERDICT")
print("=" * 60)
coverage_pct = (results['handled'] / results['total'] * 100) if results['total'] > 0 else 0
print(f"\nCoverage: {coverage_pct:.1f}%")
print(f"Skills handled: {results['handled']}/{results['total']}")
if coverage_pct >= 99:
print("\n✅ V3 SCRIPT IS COMPREHENSIVE")
print(" All skill locations are properly handled!")
if results["passed"]:
print("\n✅ V4 FLAT STRUCTURE IS VALID")
print(" All names are unique and valid directory names!")
else:
print("\n⚠️ V3 SCRIPT MAY NEED ENHANCEMENT")
print(" Some edge cases might be missed")
print("\n⚠️ V4 FLAT STRUCTURE NEEDS FIXES")
if results["collisions"] > 0:
print(f" {results['collisions']} name collisions to resolve")
if results["invalid_names"] > 0:
print(f" {results['invalid_names']} invalid directory names")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback