refactor: flatten Microsoft skills from nested to flat directory structure

Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter 'name' field as the flat directory name under skills/, replacing the nested skills/official/microsoft/<lang>/<category>/<service>/ hierarchy. This fixes CI failures caused by the indexing, validation, and catalog scripts expecting skills/<id>/SKILL.md (depth 1). Changes: - Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection - Update scripts/tests/inspect_microsoft_repo.py for flat name mapping - Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks - Delete skills/official/ nested directory - Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/) - Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json) - Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
2026-02-12 00:07:15 +05:00
parent e06454dafd
commit e7ae616385
142 changed files with 5683 additions and 6097 deletions
--- a/scripts/build-catalog.js
+++ b/scripts/build-catalog.js
@@ -1,161 +1,454 @@
-const fs = require('fs');
-const path = require('path');
+const fs = require("fs");
+const path = require("path");
 const {
  listSkillIdsRecursive,
  readSkill,
  tokenize,
  unique,
-} = require('../lib/skill-utils');
+} = require("../lib/skill-utils");

-const ROOT = path.resolve(__dirname, '..');
-const SKILLS_DIR = path.join(ROOT, 'skills');
+const ROOT = path.resolve(__dirname, "..");
+const SKILLS_DIR = path.join(ROOT, "skills");

 const STOPWORDS = new Set([
-  'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'for', 'from', 'has', 'have', 'in', 'into',
-  'is', 'it', 'its', 'of', 'on', 'or', 'our', 'out', 'over', 'that', 'the', 'their', 'they', 'this',
-  'to', 'use', 'when', 'with', 'you', 'your', 'will', 'can', 'if', 'not', 'only', 'also', 'more',
-  'best', 'practice', 'practices', 'expert', 'specialist', 'focused', 'focus', 'master', 'modern',
-  'advanced', 'comprehensive', 'production', 'production-ready', 'ready', 'build', 'create', 'deliver',
-  'design', 'implement', 'implementation', 'strategy', 'strategies', 'patterns', 'pattern', 'workflow',
-  'workflows', 'guide', 'template', 'templates', 'tool', 'tools', 'project', 'projects', 'support',
-  'manage', 'management', 'system', 'systems', 'services', 'service', 'across', 'end', 'end-to-end',
-  'using', 'based', 'ensure', 'ensure', 'help', 'needs', 'need', 'focuses', 'handles', 'builds', 'make',
+  "a",
+  "an",
+  "and",
+  "are",
+  "as",
+  "at",
+  "be",
+  "but",
+  "by",
+  "for",
+  "from",
+  "has",
+  "have",
+  "in",
+  "into",
+  "is",
+  "it",
+  "its",
+  "of",
+  "on",
+  "or",
+  "our",
+  "out",
+  "over",
+  "that",
+  "the",
+  "their",
+  "they",
+  "this",
+  "to",
+  "use",
+  "when",
+  "with",
+  "you",
+  "your",
+  "will",
+  "can",
+  "if",
+  "not",
+  "only",
+  "also",
+  "more",
+  "best",
+  "practice",
+  "practices",
+  "expert",
+  "specialist",
+  "focused",
+  "focus",
+  "master",
+  "modern",
+  "advanced",
+  "comprehensive",
+  "production",
+  "production-ready",
+  "ready",
+  "build",
+  "create",
+  "deliver",
+  "design",
+  "implement",
+  "implementation",
+  "strategy",
+  "strategies",
+  "patterns",
+  "pattern",
+  "workflow",
+  "workflows",
+  "guide",
+  "template",
+  "templates",
+  "tool",
+  "tools",
+  "project",
+  "projects",
+  "support",
+  "manage",
+  "management",
+  "system",
+  "systems",
+  "services",
+  "service",
+  "across",
+  "end",
+  "end-to-end",
+  "using",
+  "based",
+  "ensure",
+  "ensure",
+  "help",
+  "needs",
+  "need",
+  "focuses",
+  "handles",
+  "builds",
+  "make",
 ]);

 const TAG_STOPWORDS = new Set([
-  'pro', 'expert', 'patterns', 'pattern', 'workflow', 'workflows', 'templates', 'template', 'toolkit',
-  'tools', 'tool', 'project', 'projects', 'guide', 'management', 'engineer', 'architect', 'developer',
-  'specialist', 'assistant', 'analysis', 'review', 'reviewer', 'automation', 'orchestration', 'scaffold',
-  'scaffolding', 'implementation', 'strategy', 'context', 'management', 'feature', 'features', 'smart',
-  'system', 'systems', 'design', 'development', 'development', 'test', 'testing', 'workflow',
+  "pro",
+  "expert",
+  "patterns",
+  "pattern",
+  "workflow",
+  "workflows",
+  "templates",
+  "template",
+  "toolkit",
+  "tools",
+  "tool",
+  "project",
+  "projects",
+  "guide",
+  "management",
+  "engineer",
+  "architect",
+  "developer",
+  "specialist",
+  "assistant",
+  "analysis",
+  "review",
+  "reviewer",
+  "automation",
+  "orchestration",
+  "scaffold",
+  "scaffolding",
+  "implementation",
+  "strategy",
+  "context",
+  "management",
+  "feature",
+  "features",
+  "smart",
+  "system",
+  "systems",
+  "design",
+  "development",
+  "development",
+  "test",
+  "testing",
+  "workflow",
 ]);

 const CATEGORY_RULES = [
  {
-    name: 'security',
+    name: "security",
    keywords: [
-      'security', 'sast', 'compliance', 'privacy', 'threat', 'vulnerability', 'owasp', 'pci', 'gdpr',
-      'secrets', 'risk', 'malware', 'forensics', 'attack', 'incident', 'auth', 'mtls', 'zero', 'trust',
+      "security",
+      "sast",
+      "compliance",
+      "privacy",
+      "threat",
+      "vulnerability",
+      "owasp",
+      "pci",
+      "gdpr",
+      "secrets",
+      "risk",
+      "malware",
+      "forensics",
+      "attack",
+      "incident",
+      "auth",
+      "mtls",
+      "zero",
+      "trust",
    ],
  },
  {
-    name: 'infrastructure',
+    name: "infrastructure",
    keywords: [
-      'kubernetes', 'k8s', 'helm', 'terraform', 'cloud', 'network', 'devops', 'gitops', 'prometheus',
-      'grafana', 'observability', 'monitoring', 'logging', 'tracing', 'deployment', 'istio', 'linkerd',
-      'service', 'mesh', 'slo', 'sre', 'oncall', 'incident', 'pipeline', 'cicd', 'ci', 'cd', 'kafka',
+      "kubernetes",
+      "k8s",
+      "helm",
+      "terraform",
+      "cloud",
+      "network",
+      "devops",
+      "gitops",
+      "prometheus",
+      "grafana",
+      "observability",
+      "monitoring",
+      "logging",
+      "tracing",
+      "deployment",
+      "istio",
+      "linkerd",
+      "service",
+      "mesh",
+      "slo",
+      "sre",
+      "oncall",
+      "incident",
+      "pipeline",
+      "cicd",
+      "ci",
+      "cd",
+      "kafka",
    ],
  },
  {
-    name: 'data-ai',
+    name: "data-ai",
    keywords: [
-      'data', 'database', 'db', 'sql', 'postgres', 'mysql', 'analytics', 'etl', 'warehouse', 'dbt',
-      'ml', 'ai', 'llm', 'rag', 'vector', 'embedding', 'spark', 'airflow', 'cdc', 'pipeline',
+      "data",
+      "database",
+      "db",
+      "sql",
+      "postgres",
+      "mysql",
+      "analytics",
+      "etl",
+      "warehouse",
+      "dbt",
+      "ml",
+      "ai",
+      "llm",
+      "rag",
+      "vector",
+      "embedding",
+      "spark",
+      "airflow",
+      "cdc",
+      "pipeline",
    ],
  },
  {
-    name: 'development',
+    name: "development",
    keywords: [
-      'python', 'javascript', 'typescript', 'java', 'golang', 'go', 'rust', 'csharp', 'dotnet', 'php',
-      'ruby', 'node', 'react', 'frontend', 'backend', 'mobile', 'ios', 'android', 'flutter', 'fastapi',
-      'django', 'nextjs', 'vue', 'api',
+      "python",
+      "javascript",
+      "typescript",
+      "java",
+      "golang",
+      "go",
+      "rust",
+      "csharp",
+      "dotnet",
+      "php",
+      "ruby",
+      "node",
+      "react",
+      "frontend",
+      "backend",
+      "mobile",
+      "ios",
+      "android",
+      "flutter",
+      "fastapi",
+      "django",
+      "nextjs",
+      "vue",
+      "api",
    ],
  },
  {
-    name: 'architecture',
+    name: "architecture",
    keywords: [
-      'architecture', 'c4', 'microservices', 'event', 'cqrs', 'saga', 'domain', 'ddd', 'patterns',
-      'decision', 'adr',
+      "architecture",
+      "c4",
+      "microservices",
+      "event",
+      "cqrs",
+      "saga",
+      "domain",
+      "ddd",
+      "patterns",
+      "decision",
+      "adr",
    ],
  },
  {
-    name: 'testing',
-    keywords: ['testing', 'tdd', 'unit', 'e2e', 'qa', 'test'],
+    name: "testing",
+    keywords: ["testing", "tdd", "unit", "e2e", "qa", "test"],
  },
  {
-    name: 'business',
+    name: "business",
    keywords: [
-      'business', 'market', 'sales', 'finance', 'startup', 'legal', 'hr', 'product', 'customer', 'seo',
-      'marketing', 'kpi', 'contract', 'employment',
+      "business",
+      "market",
+      "sales",
+      "finance",
+      "startup",
+      "legal",
+      "hr",
+      "product",
+      "customer",
+      "seo",
+      "marketing",
+      "kpi",
+      "contract",
+      "employment",
    ],
  },
  {
-    name: 'workflow',
-    keywords: ['workflow', 'orchestration', 'conductor', 'automation', 'process', 'collaboration'],
+    name: "workflow",
+    keywords: [
+      "workflow",
+      "orchestration",
+      "conductor",
+      "automation",
+      "process",
+      "collaboration",
+    ],
  },
 ];

 const BUNDLE_RULES = {
-  'core-dev': {
-    description: 'Core development skills across languages, frameworks, and backend/frontend fundamentals.',
+  "core-dev": {
+    description:
+      "Core development skills across languages, frameworks, and backend/frontend fundamentals.",
    keywords: [
-      'python', 'javascript', 'typescript', 'go', 'golang', 'rust', 'java', 'node', 'frontend', 'backend',
-      'react', 'fastapi', 'django', 'nextjs', 'api', 'mobile', 'ios', 'android', 'flutter', 'php', 'ruby',
+      "python",
+      "javascript",
+      "typescript",
+      "go",
+      "golang",
+      "rust",
+      "java",
+      "node",
+      "frontend",
+      "backend",
+      "react",
+      "fastapi",
+      "django",
+      "nextjs",
+      "api",
+      "mobile",
+      "ios",
+      "android",
+      "flutter",
+      "php",
+      "ruby",
    ],
  },
-  'security-core': {
-    description: 'Security, privacy, and compliance essentials.',
+  "security-core": {
+    description: "Security, privacy, and compliance essentials.",
    keywords: [
-      'security', 'sast', 'compliance', 'threat', 'risk', 'privacy', 'secrets', 'owasp', 'gdpr', 'pci',
-      'vulnerability', 'auth',
+      "security",
+      "sast",
+      "compliance",
+      "threat",
+      "risk",
+      "privacy",
+      "secrets",
+      "owasp",
+      "gdpr",
+      "pci",
+      "vulnerability",
+      "auth",
    ],
  },
-  'k8s-core': {
-    description: 'Kubernetes and service mesh essentials.',
-    keywords: ['kubernetes', 'k8s', 'helm', 'istio', 'linkerd', 'service', 'mesh'],
-  },
-  'data-core': {
-    description: 'Data engineering and analytics foundations.',
+  "k8s-core": {
+    description: "Kubernetes and service mesh essentials.",
    keywords: [
-      'data', 'database', 'sql', 'dbt', 'airflow', 'spark', 'analytics', 'etl', 'warehouse', 'postgres',
-      'mysql', 'kafka',
+      "kubernetes",
+      "k8s",
+      "helm",
+      "istio",
+      "linkerd",
+      "service",
+      "mesh",
    ],
  },
-  'ops-core': {
-    description: 'Operations, observability, and delivery pipelines.',
+  "data-core": {
+    description: "Data engineering and analytics foundations.",
    keywords: [
-      'observability', 'monitoring', 'logging', 'tracing', 'prometheus', 'grafana', 'devops', 'gitops',
-      'deployment', 'cicd', 'pipeline', 'slo', 'sre', 'incident',
+      "data",
+      "database",
+      "sql",
+      "dbt",
+      "airflow",
+      "spark",
+      "analytics",
+      "etl",
+      "warehouse",
+      "postgres",
+      "mysql",
+      "kafka",
+    ],
+  },
+  "ops-core": {
+    description: "Operations, observability, and delivery pipelines.",
+    keywords: [
+      "observability",
+      "monitoring",
+      "logging",
+      "tracing",
+      "prometheus",
+      "grafana",
+      "devops",
+      "gitops",
+      "deployment",
+      "cicd",
+      "pipeline",
+      "slo",
+      "sre",
+      "incident",
    ],
  },
 };

 const CURATED_COMMON = [
-  'bash-pro',
-  'python-pro',
-  'javascript-pro',
-  'typescript-pro',
-  'golang-pro',
-  'rust-pro',
-  'java-pro',
-  'frontend-developer',
-  'backend-architect',
-  'nodejs-backend-patterns',
-  'fastapi-pro',
-  'api-design-principles',
-  'sql-pro',
-  'database-architect',
-  'kubernetes-architect',
-  'terraform-specialist',
-  'observability-engineer',
-  'security-auditor',
-  'sast-configuration',
-  'gitops-workflow',
+  "bash-pro",
+  "python-pro",
+  "javascript-pro",
+  "typescript-pro",
+  "golang-pro",
+  "rust-pro",
+  "java-pro",
+  "frontend-developer",
+  "backend-architect",
+  "nodejs-backend-patterns",
+  "fastapi-pro",
+  "api-design-principles",
+  "sql-pro",
+  "database-architect",
+  "kubernetes-architect",
+  "terraform-specialist",
+  "observability-engineer",
+  "security-auditor",
+  "sast-configuration",
+  "gitops-workflow",
 ];

 function normalizeTokens(tokens) {
-  return unique(tokens.map(token => token.toLowerCase())).filter(Boolean);
+  return unique(tokens.map((token) => token.toLowerCase())).filter(Boolean);
 }

 function deriveTags(skill) {
  let tags = Array.isArray(skill.tags) ? skill.tags : [];
-  tags = tags.map(tag => tag.toLowerCase()).filter(Boolean);
+  tags = tags.map((tag) => tag.toLowerCase()).filter(Boolean);

  if (!tags.length) {
    tags = skill.id
-      .split('-')
-      .map(tag => tag.toLowerCase())
-      .filter(tag => tag && !TAG_STOPWORDS.has(tag));
+      .split("-")
+      .map((tag) => tag.toLowerCase())
+      .filter((tag) => tag && !TAG_STOPWORDS.has(tag));
  }

  return normalizeTokens(tags);
@@ -177,17 +470,18 @@ function detectCategory(skill, tags) {
    }
  }

-  return 'general';
+  return "general";
 }

 function buildTriggers(skill, tags) {
-  const tokens = tokenize(`${skill.name} ${skill.description}`)
-    .filter(token => token.length >= 2 && !STOPWORDS.has(token));
+  const tokens = tokenize(`${skill.name} ${skill.description}`).filter(
+    (token) => token.length >= 2 && !STOPWORDS.has(token),
+  );
  return unique([...tags, ...tokens]).slice(0, 12);
 }

 function buildAliases(skills) {
-  const existingIds = new Set(skills.map(skill => skill.id));
+  const existingIds = new Set(skills.map((skill) => skill.id));
  const aliases = {};
  const used = new Set();

@@ -200,7 +494,7 @@ function buildAliases(skills) {
      }
    }

-    const tokens = skill.id.split('-').filter(Boolean);
+    const tokens = skill.id.split("-").filter(Boolean);
    if (skill.id.length < 28 || tokens.length < 4) continue;

    const deduped = [];
@@ -211,10 +505,11 @@ function buildAliases(skills) {
      deduped.push(token);
    }

-    const aliasTokens = deduped.length > 3
-      ? [deduped[0], deduped[1], deduped[deduped.length - 1]]
-      : deduped;
-    const alias = unique(aliasTokens).join('-');
+    const aliasTokens =
+      deduped.length > 3
+        ? [deduped[0], deduped[1], deduped[deduped.length - 1]]
+        : deduped;
+    const alias = unique(aliasTokens).join("-");

    if (!alias || alias === skill.id) continue;
    if (existingIds.has(alias) || used.has(alias)) continue;
@@ -241,11 +536,11 @@ function buildBundles(skills) {

  for (const [bundleName, rule] of Object.entries(BUNDLE_RULES)) {
    const bundleSkills = [];
-    const keywords = rule.keywords.map(keyword => keyword.toLowerCase());
+    const keywords = rule.keywords.map((keyword) => keyword.toLowerCase());

    for (const skill of skills) {
      const tokenSet = skillTokens.get(skill.id) || new Set();
-      if (keywords.some(keyword => tokenSet.has(keyword))) {
+      if (keywords.some((keyword) => tokenSet.has(keyword))) {
        bundleSkills.push(skill.id);
      }
    }
@@ -256,49 +551,58 @@ function buildBundles(skills) {
    };
  }

-  const common = CURATED_COMMON.filter(skillId => skillTokens.has(skillId));
+  const common = CURATED_COMMON.filter((skillId) => skillTokens.has(skillId));

  return { bundles, common };
 }

 function truncate(value, limit) {
-  if (!value || value.length <= limit) return value || '';
+  if (!value || value.length <= limit) return value || "";
  return `${value.slice(0, limit - 3)}...`;
 }

 function renderCatalogMarkdown(catalog) {
  const lines = [];
-  lines.push('# Skill Catalog');
-  lines.push('');
+  lines.push("# Skill Catalog");
+  lines.push("");
  lines.push(`Generated at: ${catalog.generatedAt}`);
-  lines.push('');
+  lines.push("");
  lines.push(`Total skills: ${catalog.total}`);
-  lines.push('');
+  lines.push("");

-  const categories = Array.from(new Set(catalog.skills.map(skill => skill.category))).sort();
+  const categories = Array.from(
+    new Set(catalog.skills.map((skill) => skill.category)),
+  ).sort();
  for (const category of categories) {
-    const grouped = catalog.skills.filter(skill => skill.category === category);
+    const grouped = catalog.skills.filter(
+      (skill) => skill.category === category,
+    );
    lines.push(`## ${category} (${grouped.length})`);
-    lines.push('');
-    lines.push('| Skill | Description | Tags | Triggers |');
-    lines.push('| --- | --- | --- | --- |');
+    lines.push("");
+    lines.push("| Skill | Description | Tags | Triggers |");
+    lines.push("| --- | --- | --- | --- |");

    for (const skill of grouped) {
-      const description = truncate(skill.description, 160).replace(/\|/g, '\\|');
-      const tags = skill.tags.join(', ');
-      const triggers = skill.triggers.join(', ');
-      lines.push(`| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`);
+      const description = truncate(skill.description, 160).replace(
+        /\|/g,
+        "\\|",
+      );
+      const tags = skill.tags.join(", ");
+      const triggers = skill.triggers.join(", ");
+      lines.push(
+        `| \`${skill.id}\` | ${description} | ${tags} | ${triggers} |`,
+      );
    }

-    lines.push('');
+    lines.push("");
  }

-  return lines.join('\n');
+  return lines.join("\n");
 }

 function buildCatalog() {
  const skillRelPaths = listSkillIdsRecursive(SKILLS_DIR);
-  const skills = skillRelPaths.map(relPath => readSkill(SKILLS_DIR, relPath));
+  const skills = skillRelPaths.map((relPath) => readSkill(SKILLS_DIR, relPath));
  const catalogSkills = [];

  for (const skill of skills) {
@@ -318,26 +622,32 @@ function buildCatalog() {
  }

  const catalog = {
-    generatedAt: process.env.SOURCE_DATE_EPOCH 
-      ? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString() 
-      : (process.env.CI ? '2026-02-08T00:00:00.000Z' : new Date().toISOString()),
+    generatedAt: process.env.SOURCE_DATE_EPOCH
+      ? new Date(process.env.SOURCE_DATE_EPOCH * 1000).toISOString()
+      : "2026-02-08T00:00:00.000Z",
    total: catalogSkills.length,
-    skills: catalogSkills.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)),
+    skills: catalogSkills.sort((a, b) =>
+      a.id < b.id ? -1 : a.id > b.id ? 1 : 0,
+    ),
  };

  const aliases = buildAliases(catalog.skills);
  const bundleData = buildBundles(catalog.skills);

-  const catalogPath = path.join(ROOT, 'data', 'catalog.json');
-  const catalogMarkdownPath = path.join(ROOT, 'CATALOG.md');
-  const bundlesPath = path.join(ROOT, 'data', 'bundles.json');
-  const aliasesPath = path.join(ROOT, 'data', 'aliases.json');
+  const catalogPath = path.join(ROOT, "data", "catalog.json");
+  const catalogMarkdownPath = path.join(ROOT, "CATALOG.md");
+  const bundlesPath = path.join(ROOT, "data", "bundles.json");
+  const aliasesPath = path.join(ROOT, "data", "aliases.json");

  fs.writeFileSync(catalogPath, JSON.stringify(catalog, null, 2));
  fs.writeFileSync(catalogMarkdownPath, renderCatalogMarkdown(catalog));
  fs.writeFileSync(
    bundlesPath,
-    JSON.stringify({ generatedAt: catalog.generatedAt, ...bundleData }, null, 2),
+    JSON.stringify(
+      { generatedAt: catalog.generatedAt, ...bundleData },
+      null,
+      2,
+    ),
  );
  fs.writeFileSync(
    aliasesPath,
--- a/scripts/sync_microsoft_skills.py
+++ b/scripts/sync_microsoft_skills.py
@@ -1,285 +1,293 @@
 #!/usr/bin/env python3
 """
-Sync Microsoft Skills Repository - v3
-Preserves original structure from skills/ directory and handles all locations
+Sync Microsoft Skills Repository - v4 (Flat Structure)
+Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory
+name under skills/ to comply with the repository's indexing conventions.
 """

+import re
 import shutil
 import subprocess
 import tempfile
-from pathlib import Path
 import json
+from pathlib import Path

 MS_REPO = "https://github.com/microsoft/skills.git"
-TARGET_DIR = Path(__file__).parent.parent / "skills"
+REPO_ROOT = Path(__file__).parent.parent
+TARGET_DIR = REPO_ROOT / "skills"
+DOCS_DIR = REPO_ROOT / "docs"
+

 def clone_repo(temp_dir: Path):
-    """Clone Microsoft skills repository"""
+    """Clone Microsoft skills repository (shallow)."""
    print("🔄 Cloning Microsoft Skills repository...")
    subprocess.run(
        ["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)],
-        check=True
+        check=True,
    )

-def find_all_skills(source_dir: Path):
-    """Find all SKILL.md files in the repository"""
-    all_skills = {}
-    
-    # Search in .github/skills/
-    github_skills = source_dir / ".github" / "skills"
-    if github_skills.exists():
-        for skill_dir in github_skills.iterdir():
-            if skill_dir.is_dir() and (skill_dir / "SKILL.md").exists():
-                all_skills[skill_dir.name] = skill_dir
-    
-    # Search in .github/plugins/
-    github_plugins = source_dir / ".github" / "plugins"
-    if github_plugins.exists():
-        for skill_file in github_plugins.rglob("SKILL.md"):
-            skill_dir = skill_file.parent
-            skill_name = skill_dir.name
-            if skill_name not in all_skills:
-                all_skills[skill_name] = skill_dir
-    
-    return all_skills

-def sync_skills_preserve_structure(source_dir: Path, target_dir: Path):
+def extract_skill_name(skill_md_path: Path) -> str | None:
+    """Extract the 'name' field from SKILL.md YAML frontmatter."""
+    try:
+        content = skill_md_path.read_text(encoding="utf-8")
+    except Exception:
+        return None
+
+    fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
+    if not fm_match:
+        return None
+
+    for line in fm_match.group(1).splitlines():
+        match = re.match(r"^name:\s*(.+)$", line)
+        if match:
+            value = match.group(1).strip().strip("\"'")
+            if value:
+                return value
+    return None
+
+
+def generate_fallback_name(relative_path: Path) -> str:
    """
-    Sync skills preserving the original skills/ directory structure.
-    This is better than auto-categorization since MS already organized them.
+    Generate a fallback directory name when frontmatter 'name' is missing.
+    Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'.
+    """
+    parts = [p for p in relative_path.parts if p]
+    return "ms-" + "-".join(parts)
+
+
+def find_skills_in_directory(source_dir: Path):
+    """
+    Walk the Microsoft repo's skills/ directory (which uses symlinks)
+    and resolve each to its actual SKILL.md content.
+    Returns list of dicts: {relative_path, skill_md_path, source_dir}.
    """
    skills_source = source_dir / "skills"
-    
+    results = []
+
    if not skills_source.exists():
-        print("  ⚠️  skills/ directory not found, will use flat structure")
-        return sync_skills_flat(source_dir, target_dir)
-    
-    # First, find all actual skill content
-    all_skills = find_all_skills(source_dir)
-    print(f"  📂 Found {len(all_skills)} total skills in repository")
-    
-    synced_count = 0
-    skill_metadata = []
-    
-    # Walk through the skills/ directory structure
+        return results
+
    for item in skills_source.rglob("*"):
-        # Skip non-directories
        if not item.is_dir():
            continue
-        
-        # Check if this directory (or its symlink target) contains a SKILL.md
+
        skill_md = None
-        skill_source_dir = None
-        
-        # If it's a symlink, resolve it
+        actual_dir = None
+
        if item.is_symlink():
            try:
                resolved = item.resolve()
                if (resolved / "SKILL.md").exists():
                    skill_md = resolved / "SKILL.md"
-                    skill_source_dir = resolved
-            except:
+                    actual_dir = resolved
+            except Exception:
                continue
        elif (item / "SKILL.md").exists():
            skill_md = item / "SKILL.md"
-            skill_source_dir = item
-        
+            actual_dir = item
+
        if skill_md is None:
            continue
-        
-        # Get relative path from skills/ directory - this preserves MS's organization
+
        try:
            relative_path = item.relative_to(skills_source)
        except ValueError:
-            # Shouldn't happen, but handle it
            continue
-        
-        # Create target directory preserving structure
-        target_skill_dir = target_dir / "official" / "microsoft" / relative_path
-        target_skill_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Copy SKILL.md
-        shutil.copy2(skill_md, target_skill_dir / "SKILL.md")
-        
-        # Copy other files from the actual skill directory
-        for file_item in skill_source_dir.iterdir():
-            if file_item.name != "SKILL.md" and file_item.is_file():
-                shutil.copy2(file_item, target_skill_dir / file_item.name)
-        
-        # Collect metadata
-        skill_metadata.append({
-            "path": str(relative_path),
-            "name": item.name,
-            "category": str(relative_path.parent),
-            "source": str(skill_source_dir.relative_to(source_dir))
-        })
-        
-        synced_count += 1
-        print(f"  ✅ Synced: {relative_path}")
-    
-    # Also sync any skills from .github/plugins that aren't symlinked in skills/
-    plugin_skills = find_plugin_skills(source_dir, skill_metadata)
-    if plugin_skills:
-        print(f"\n  📦 Found {len(plugin_skills)} additional plugin skills")
-        for plugin_skill in plugin_skills:
-            target_skill_dir = target_dir / "official" / "microsoft" / "plugins" / plugin_skill['name']
-            target_skill_dir.mkdir(parents=True, exist_ok=True)
-            
-            # Copy SKILL.md
-            shutil.copy2(plugin_skill['source'] / "SKILL.md", target_skill_dir / "SKILL.md")
-            
-            # Copy other files
-            for file_item in plugin_skill['source'].iterdir():
-                if file_item.name != "SKILL.md" and file_item.is_file():
-                    shutil.copy2(file_item, target_skill_dir / file_item.name)
-            
-            skill_metadata.append({
-                "path": f"plugins/{plugin_skill['name']}",
-                "name": plugin_skill['name'],
-                "category": "plugins",
-                "source": str(plugin_skill['source'].relative_to(source_dir))
-            })
-            
-            synced_count += 1
-            print(f"  ✅ Synced: plugins/{plugin_skill['name']}")
-    
-    return synced_count, skill_metadata

-def find_plugin_skills(source_dir: Path, already_synced: list):
-    """Find plugin skills that haven't been synced yet"""
-    synced_names = {s['name'] for s in already_synced}
-    plugin_skills = []
-    
+        results.append({
+            "relative_path": relative_path,
+            "skill_md": skill_md,
+            "source_dir": actual_dir,
+        })
+
+    return results
+
+
+def find_plugin_skills(source_dir: Path, already_synced_names: set):
+    """Find plugin skills in .github/plugins/ that haven't been synced yet."""
+    results = []
    github_plugins = source_dir / ".github" / "plugins"
-    if github_plugins.exists():
-        for skill_file in github_plugins.rglob("SKILL.md"):
-            skill_dir = skill_file.parent
-            skill_name = skill_dir.name
-            
-            if skill_name not in synced_names:
-                plugin_skills.append({
-                    'name': skill_name,
-                    'source': skill_dir
-                })
-    
-    return plugin_skills
+
+    if not github_plugins.exists():
+        return results
+
+    for skill_file in github_plugins.rglob("SKILL.md"):
+        skill_dir = skill_file.parent
+        skill_name = skill_dir.name
+
+        if skill_name not in already_synced_names:
+            results.append({
+                "relative_path": Path("plugins") / skill_name,
+                "skill_md": skill_file,
+                "source_dir": skill_dir,
+            })
+
+    return results
+

 def sync_skills_flat(source_dir: Path, target_dir: Path):
-    """Fallback: sync all skills in a flat structure"""
-    all_skills = find_all_skills(source_dir)
-    
+    """
+    Sync all Microsoft skills into a flat structure under skills/.
+    Uses frontmatter 'name' as directory name, with collision detection.
+    """
+    all_skill_entries = find_skills_in_directory(source_dir)
+    print(f"  📂 Found {len(all_skill_entries)} skills in skills/ directory")
+
    synced_count = 0
    skill_metadata = []
-    
-    for skill_name, skill_dir in all_skills.items():
-        target_skill_dir = target_dir / "official" / "microsoft" / skill_name
+    # name -> original relative_path (for collision logging)
+    used_names: dict[str, str] = {}
+
+    for entry in all_skill_entries:
+        skill_name = extract_skill_name(entry["skill_md"])
+
+        if not skill_name:
+            skill_name = generate_fallback_name(entry["relative_path"])
+            print(
+                f"  ⚠️  No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}")
+
+        # Collision detection
+        if skill_name in used_names:
+            original = used_names[skill_name]
+            print(
+                f"  ⚠️  Name collision '{skill_name}': {entry['relative_path']} vs {original}")
+            # Append language prefix from path to disambiguate
+            lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown"
+            skill_name = f"{skill_name}-{lang}"
+            print(f"       Resolved to: {skill_name}")
+
+        used_names[skill_name] = str(entry["relative_path"])
+
+        # Create flat target directory
+        target_skill_dir = target_dir / skill_name
        target_skill_dir.mkdir(parents=True, exist_ok=True)
-        
+
        # Copy SKILL.md
-        shutil.copy2(skill_dir / "SKILL.md", target_skill_dir / "SKILL.md")
-        
-        # Copy other files
-        for item in skill_dir.iterdir():
-            if item.name != "SKILL.md" and item.is_file():
-                shutil.copy2(item, target_skill_dir / item.name)
-        
+        shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
+
+        # Copy other files from the skill directory
+        for file_item in entry["source_dir"].iterdir():
+            if file_item.name != "SKILL.md" and file_item.is_file():
+                shutil.copy2(file_item, target_skill_dir / file_item.name)
+
        skill_metadata.append({
-            "path": skill_name,
-            "name": skill_name,
-            "category": "root"
+            "flat_name": skill_name,
+            "original_path": str(entry["relative_path"]),
+            "source": "microsoft/skills",
        })
-        
+
        synced_count += 1
-        print(f"  ✅ Synced: {skill_name}")
-    
+        print(f"  ✅ {entry['relative_path']} → skills/{skill_name}/")
+
+    # Sync plugin skills
+    synced_names = set(used_names.keys())
+    plugin_entries = find_plugin_skills(
+        source_dir, {e["source_dir"].name for e in all_skill_entries})
+
+    if plugin_entries:
+        print(f"\n  📦 Found {len(plugin_entries)} additional plugin skills")
+        for entry in plugin_entries:
+            skill_name = extract_skill_name(entry["skill_md"])
+            if not skill_name:
+                skill_name = entry["source_dir"].name
+
+            if skill_name in synced_names:
+                skill_name = f"{skill_name}-plugin"
+
+            synced_names.add(skill_name)
+
+            target_skill_dir = target_dir / skill_name
+            target_skill_dir.mkdir(parents=True, exist_ok=True)
+
+            shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
+
+            for file_item in entry["source_dir"].iterdir():
+                if file_item.name != "SKILL.md" and file_item.is_file():
+                    shutil.copy2(file_item, target_skill_dir / file_item.name)
+
+            skill_metadata.append({
+                "flat_name": skill_name,
+                "original_path": str(entry["relative_path"]),
+                "source": "microsoft/skills (plugin)",
+            })
+
+            synced_count += 1
+            print(f"  ✅ {entry['relative_path']} → skills/{skill_name}/")
+
    return synced_count, skill_metadata

-def create_attribution_file(target_dir: Path, metadata: list):
-    """Create attribution and metadata file"""
+
+def save_attribution(metadata: list):
+    """Save attribution metadata to docs/."""
+    DOCS_DIR.mkdir(parents=True, exist_ok=True)
    attribution = {
        "source": "microsoft/skills",
        "repository": "https://github.com/microsoft/skills",
        "license": "MIT",
        "synced_skills": len(metadata),
+        "structure": "flat (frontmatter name as directory name)",
        "skills": metadata,
-        "note": "Symlinks resolved and content copied for compatibility. Original directory structure preserved."
    }
-    
-    ms_dir = target_dir / "official" / "microsoft"
-    ms_dir.mkdir(parents=True, exist_ok=True)
-    
-    with open(ms_dir / "ATTRIBUTION.json", "w") as f:
+    with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f:
        json.dump(attribution, f, indent=2)

-def copy_documentation(source_dir: Path, target_dir: Path):
-    """Copy LICENSE and README files"""
-    ms_dir = target_dir / "official" / "microsoft"
-    ms_dir.mkdir(parents=True, exist_ok=True)
-    
+
+def copy_license(source_dir: Path):
+    """Copy the Microsoft LICENSE to docs/."""
+    DOCS_DIR.mkdir(parents=True, exist_ok=True)
    if (source_dir / "LICENSE").exists():
-        shutil.copy2(source_dir / "LICENSE", ms_dir / "LICENSE")
-    
-    if (source_dir / "README.md").exists():
-        shutil.copy2(source_dir / "README.md", ms_dir / "README-MICROSOFT.md")
+        shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT")
+

 def main():
-    """Main sync function"""
-    print("🚀 Microsoft Skills Sync Script v3")
-    print("=" * 50)
-    
+    """Main sync function."""
+    print("🚀 Microsoft Skills Sync Script v4 (Flat Structure)")
+    print("=" * 55)
+
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)
-        
+
        try:
-            # Clone repository
            clone_repo(temp_path)
-            
-            # Create target directory
+
            TARGET_DIR.mkdir(parents=True, exist_ok=True)
-            
-            # Sync skills (preserving structure)
-            print("\n🔗 Resolving symlinks and preserving directory structure...")
-            count, metadata = sync_skills_preserve_structure(temp_path, TARGET_DIR)
-            
-            # Copy documentation
-            print("\n📄 Copying documentation...")
-            copy_documentation(temp_path, TARGET_DIR)
-            
-            # Create attribution file
-            print("📝 Creating attribution metadata...")
-            create_attribution_file(TARGET_DIR, metadata)
-            
-            print(f"\n✨ Success! Synced {count} Microsoft skills")
-            print(f"📁 Location: {TARGET_DIR / 'official' / 'microsoft'}")
-            
-            # Show structure summary
-            ms_dir = TARGET_DIR / "official" / "microsoft"
-            categories = set()
+
+            print("\n🔗 Resolving symlinks and flattening into skills/<name>/...")
+            count, metadata = sync_skills_flat(temp_path, TARGET_DIR)
+
+            print("\n📄 Saving attribution...")
+            save_attribution(metadata)
+            copy_license(temp_path)
+
+            print(
+                f"\n✨ Success! Synced {count} Microsoft skills (flat structure)")
+            print(f"📁 Location: {TARGET_DIR}/")
+
+            # Show summary of languages
+            languages = set()
            for skill in metadata:
-                cat = skill.get('category', 'root')
-                if cat != 'root':
-                    categories.add(cat.split('/')[0] if '/' in cat else cat)
-            
+                parts = skill["original_path"].split("/")
+                if len(parts) >= 1 and parts[0] != "plugins":
+                    languages.add(parts[0])
+
            print(f"\n📊 Organization:")
            print(f"  Total skills: {count}")
-            print(f"  Categories: {', '.join(sorted(categories)[:10])}")
-            if len(categories) > 10:
-                print(f"  ... and {len(categories) - 10} more")
-            
+            print(f"  Languages: {', '.join(sorted(languages))}")
+
            print("\n📋 Next steps:")
-            print("1. Review synced skills")
-            print("2. Run: npm run validate")
-            print("3. Update CATALOG.md")
-            print("4. Update docs/SOURCES.md")
-            print("5. Commit changes and create PR")
-            
+            print("1. Delete old skills/official/ directory (if it exists)")
+            print("2. Run: npm run build")
+            print("3. Commit changes and create PR")
+
        except Exception as e:
            print(f"\n❌ Error: {e}")
            import traceback
            traceback.print_exc()
            return 1
-    
+
    return 0

+
 if __name__ == "__main__":
-    exit(main())
+    exit(main())
--- a/scripts/tests/inspect_microsoft_repo.py
+++ b/scripts/tests/inspect_microsoft_repo.py
@@ -1,149 +1,98 @@
 #!/usr/bin/env python3
 """
-Debug script to inspect Microsoft Skills repository structure - v2
-Handles all skill locations including plugins
+Inspect Microsoft Skills Repository Structure
+Shows the repository layout, skill locations, and what flat names would be generated.
 """

+import re
 import subprocess
 import tempfile
 from pathlib import Path

 MS_REPO = "https://github.com/microsoft/skills.git"

+
+def extract_skill_name(skill_md_path: Path) -> str | None:
+    """Extract the 'name' field from SKILL.md YAML frontmatter."""
+    try:
+        content = skill_md_path.read_text(encoding="utf-8")
+    except Exception:
+        return None
+
+    fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
+    if not fm_match:
+        return None
+
+    for line in fm_match.group(1).splitlines():
+        match = re.match(r"^name:\s*(.+)$", line)
+        if match:
+            value = match.group(1).strip().strip("\"'")
+            if value:
+                return value
+    return None
+
+
 def inspect_repo():
-    """Inspect the Microsoft skills repository structure"""
+    """Inspect the Microsoft skills repository structure."""
    print("🔍 Inspecting Microsoft Skills Repository Structure")
    print("=" * 60)
-    
+
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)
-        
+
        print("\n1️⃣ Cloning repository...")
        subprocess.run(
            ["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
            check=True,
-            capture_output=True
+            capture_output=True,
        )
-        
-        print("\n2️⃣ Repository structure:")
-        print("\nTop-level directories:")
-        for item in temp_path.iterdir():
-            if item.is_dir():
-                print(f"  📁 {item.name}/")
-        
-        # Check .github/skills
-        github_skills = temp_path / ".github" / "skills"
-        if github_skills.exists():
-            skill_dirs = [d for d in github_skills.iterdir() if d.is_dir()]
-            print(f"\n3️⃣ Found {len(skill_dirs)} directories in .github/skills/:")
-            for skill_dir in skill_dirs[:5]:
-                has_skill_md = (skill_dir / "SKILL.md").exists()
-                print(f"  {'✅' if has_skill_md else '❌'} {skill_dir.name}")
-            if len(skill_dirs) > 5:
-                print(f"  ... and {len(skill_dirs) - 5} more")
-        
-        # Check .github/plugins
-        github_plugins = temp_path / ".github" / "plugins"
-        if github_plugins.exists():
-            plugin_skills = list(github_plugins.rglob("SKILL.md"))
-            print(f"\n🔌 Found {len(plugin_skills)} plugin skills in .github/plugins/:")
-            for skill_file in plugin_skills[:5]:
-                try:
-                    rel_path = skill_file.relative_to(github_plugins)
-                    print(f"  ✅ {rel_path}")
-                except ValueError:
-                    print(f"  ✅ {skill_file.name}")
-            if len(plugin_skills) > 5:
-                print(f"  ... and {len(plugin_skills) - 5} more")
-        
-        # Check skills directory
-        skills_dir = temp_path / "skills"
-        if skills_dir.exists():
-            print(f"\n4️⃣ Checking skills/ directory structure:")
-            
-            # Count items
-            all_items = list(skills_dir.rglob("*"))
-            symlink_dirs = [s for s in all_items if s.is_symlink() and s.is_dir()]
-            symlink_files = [s for s in all_items if s.is_symlink() and not s.is_dir()]
-            regular_dirs = [s for s in all_items if s.is_dir() and not s.is_symlink()]
-            
-            print(f"  Total items: {len(all_items)}")
-            print(f"  Regular directories: {len(regular_dirs)}")
-            print(f"  Symlinked directories: {len(symlink_dirs)}")
-            print(f"  Symlinked files: {len(symlink_files)}")
-            
-            # Show directory structure
-            print(f"\n  Top-level categories in skills/:")
-            for item in skills_dir.iterdir():
-                if item.is_dir():
-                    # Count subdirs
-                    subdirs = [d for d in item.iterdir() if d.is_dir()]
-                    print(f"    📁 {item.name}/ ({len(subdirs)} items)")
-            
-            if symlink_dirs:
-                print(f"\n  Sample symlinked directories:")
-                for symlink in symlink_dirs[:5]:
-                    try:
-                        target = symlink.resolve()
-                        relative = symlink.relative_to(skills_dir)
-                        target_name = target.name if target.exists() else "broken"
-                        print(f"    {relative} → {target_name}")
-                    except:
-                        pass
-        
-        # Check for all SKILL.md files
-        print(f"\n5️⃣ Comprehensive SKILL.md search:")
+
+        # Find all SKILL.md files
        all_skill_mds = list(temp_path.rglob("SKILL.md"))
-        print(f"  Total SKILL.md files found: {len(all_skill_mds)}")
-        
-        # Categorize by location
-        locations = {}
-        for skill_md in all_skill_mds:
+        print(f"\n2️⃣ Total SKILL.md files found: {len(all_skill_mds)}")
+
+        # Show flat name mapping
+        print(f"\n3️⃣ Flat Name Mapping (frontmatter 'name' → directory name):")
+        print("-" * 60)
+
+        names_seen: dict[str, list[str]] = {}
+
+        for skill_md in sorted(all_skill_mds, key=lambda p: str(p)):
            try:
-                if ".github/skills" in str(skill_md):
-                    loc = ".github/skills"
-                elif ".github/plugins" in str(skill_md):
-                    loc = ".github/plugins"
-                elif "/skills/" in str(skill_md):
-                    loc = "skills/ (structure)"
-                else:
-                    loc = "other"
-                
-                locations[loc] = locations.get(loc, 0) + 1
-            except:
-                pass
-        
-        print(f"\n  Distribution by location:")
-        for loc, count in sorted(locations.items()):
-            print(f"    {loc}: {count}")
-        
-        # Show sample skills from each major category
-        print(f"\n6️⃣ Sample skills by category:")
-        
-        if skills_dir.exists():
-            for category in list(skills_dir.iterdir())[:3]:
-                if category.is_dir():
-                    skills_in_cat = [s for s in category.rglob("*") if s.is_dir() and (s.is_symlink() or (s / "SKILL.md").exists())]
-                    print(f"\n  {category.name}/ ({len(skills_in_cat)} skills):")
-                    for skill in skills_in_cat[:3]:
-                        try:
-                            rel = skill.relative_to(skills_dir)
-                            print(f"    - {rel}")
-                        except:
-                            pass
-        
-        print("\n7️⃣ Recommendations:")
-        print("  ✅ Preserve skills/ directory structure (Microsoft's organization)")
-        print("  ✅ Resolve symlinks to actual content in .github/skills/")
-        print("  ✅ Include plugin skills from .github/plugins/")
-        print("  ✅ This gives you the cleanest, most maintainable structure")
-        
+                rel = skill_md.parent.relative_to(temp_path)
+            except ValueError:
+                rel = skill_md.parent
+
+            name = extract_skill_name(skill_md)
+            display_name = name if name else f"(no name → ms-{'-'.join(rel.parts[1:])})"
+
+            print(f"  {rel} → {display_name}")
+
+            effective_name = name if name else f"ms-{'-'.join(rel.parts[1:])}"
+            if effective_name not in names_seen:
+                names_seen[effective_name] = []
+            names_seen[effective_name].append(str(rel))
+
+        # Collision check
+        collisions = {n: paths for n, paths in names_seen.items()
+                      if len(paths) > 1}
+        if collisions:
+            print(f"\n4️⃣ ⚠️  Name Collisions Detected ({len(collisions)}):")
+            for name, paths in collisions.items():
+                print(f"  '{name}':")
+                for p in paths:
+                    print(f"    - {p}")
+        else:
+            print(
+                f"\n4️⃣ ✅ No name collisions — all {len(names_seen)} names are unique!")
+
        print("\n✨ Inspection complete!")

+
 if __name__ == "__main__":
    try:
        inspect_repo()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
-        traceback.print_exc()
+        traceback.print_exc()
--- a/scripts/tests/test_comprehensive_coverage.py
+++ b/scripts/tests/test_comprehensive_coverage.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 """
-Test Script: Verify Microsoft Skills Sync Coverage
-Tests all possible skill locations and structures
+Test Script: Verify Microsoft Skills Sync Coverage and Flat Name Uniqueness
+Ensures all skills are captured and no directory name collisions exist.
 """

+import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -11,204 +12,177 @@ from collections import defaultdict

 MS_REPO = "https://github.com/microsoft/skills.git"

+
+def extract_skill_name(skill_md_path: Path) -> str | None:
+    """Extract the 'name' field from SKILL.md YAML frontmatter."""
+    try:
+        content = skill_md_path.read_text(encoding="utf-8")
+    except Exception:
+        return None
+
+    fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
+    if not fm_match:
+        return None
+
+    for line in fm_match.group(1).splitlines():
+        match = re.match(r"^name:\s*(.+)$", line)
+        if match:
+            value = match.group(1).strip().strip("\"'")
+            if value:
+                return value
+    return None
+
+
 def analyze_skill_locations():
    """
    Comprehensive analysis of all skill locations in Microsoft repo.
-    Verifies that v3 script will catch everything.
+    Verifies flat name uniqueness and coverage.
    """
-    print("🔬 Comprehensive Skill Location Analysis")
+    print("🔬 Comprehensive Skill Coverage & Uniqueness Analysis")
    print("=" * 60)
-    
+
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)
-        
+
        print("\n1️⃣ Cloning repository...")
        subprocess.run(
            ["git", "clone", "--depth", "1", MS_REPO, str(temp_path)],
            check=True,
-            capture_output=True
+            capture_output=True,
        )
-        
-        # Find ALL SKILL.md files in the entire repo
+
+        # Find ALL SKILL.md files
        all_skill_files = list(temp_path.rglob("SKILL.md"))
        print(f"\n2️⃣ Total SKILL.md files found: {len(all_skill_files)}")
-        
-        # Categorize by location type
+
+        # Categorize by location
        location_types = defaultdict(list)
-        
        for skill_file in all_skill_files:
-            skill_dir = skill_file.parent
-            
-            # Determine location type
-            if ".github/skills" in str(skill_file):
+            path_str = str(skill_file)
+            if ".github/skills" in path_str:
                location_types["github_skills"].append(skill_file)
-            elif ".github/plugins" in str(skill_file):
+            elif ".github/plugins" in path_str:
                location_types["github_plugins"].append(skill_file)
-            elif "/skills/" in str(skill_file):
-                # This is in the skills/ directory structure
-                # Check if it's via symlink or actual file
-                try:
-                    skills_root = temp_path / "skills"
-                    if skills_root in skill_file.parents:
-                        # This skill is somewhere under skills/
-                        # But is it a symlink or actual?
-                        if skill_dir.is_symlink():
-                            location_types["skills_symlinked"].append(skill_file)
-                        else:
-                            # Check if any parent is a symlink
-                            has_symlink_parent = False
-                            for parent in skill_file.parents:
-                                if parent == skills_root:
-                                    break
-                                if parent.is_symlink():
-                                    has_symlink_parent = True
-                                    break
-                            
-                            if has_symlink_parent:
-                                location_types["skills_via_symlink_parent"].append(skill_file)
-                            else:
-                                location_types["skills_direct"].append(skill_file)
-                except:
-                    location_types["unknown"].append(skill_file)
+            elif "/skills/" in path_str:
+                location_types["skills_dir"].append(skill_file)
            else:
                location_types["other"].append(skill_file)
-        
-        # Display results
+
        print("\n3️⃣ Skills by Location Type:")
-        print("-" * 60)
-        
        for loc_type, files in sorted(location_types.items()):
-            print(f"\n  📍 {loc_type}: {len(files)} skills")
-            if len(files) <= 5:
-                for f in files:
-                    try:
-                        rel = f.relative_to(temp_path)
-                        print(f"      - {rel}")
-                    except:
-                        print(f"      - {f.name}")
-            else:
-                for f in files[:3]:
-                    try:
-                        rel = f.relative_to(temp_path)
-                        print(f"      - {rel}")
-                    except:
-                        print(f"      - {f.name}")
-                print(f"      ... and {len(files) - 3} more")
-        
-        # Verify v3 coverage
-        print("\n4️⃣ V3 Script Coverage Analysis:")
+            print(f"  📍 {loc_type}: {len(files)} skills")
+
+        # Flat name uniqueness check
+        print("\n4️⃣ Flat Name Uniqueness Check:")
        print("-" * 60)
-        
-        github_skills_count = len(location_types["github_skills"])
-        github_plugins_count = len(location_types["github_plugins"])
-        skills_symlinked_count = len(location_types["skills_symlinked"])
-        skills_direct_count = len(location_types["skills_direct"])
-        skills_via_symlink_parent_count = len(location_types["skills_via_symlink_parent"])
-        
-        print(f"\n  ✅ .github/skills/: {github_skills_count}")
-        print(f"     └─ Handled by: find_all_skills() function")
-        
-        print(f"\n  ✅ .github/plugins/: {github_plugins_count}")
-        print(f"     └─ Handled by: find_plugin_skills() function")
-        
-        print(f"\n  ✅ skills/ (symlinked dirs): {skills_symlinked_count}")
-        print(f"     └─ Handled by: sync_skills_preserve_structure() lines 76-83")
-        
-        if skills_direct_count > 0:
-            print(f"\n  ✅ skills/ (direct, non-symlink): {skills_direct_count}")
-            print(f"     └─ Handled by: sync_skills_preserve_structure() lines 84-86")
+
+        name_map: dict[str, list[str]] = {}
+        missing_names = []
+
+        for skill_file in all_skill_files:
+            try:
+                rel = skill_file.parent.relative_to(temp_path)
+            except ValueError:
+                rel = skill_file.parent
+
+            name = extract_skill_name(skill_file)
+            if not name:
+                missing_names.append(str(rel))
+                # Generate fallback
+                parts = [p for p in rel.parts if p not in (
+                    ".github", "skills", "plugins")]
+                name = "ms-" + "-".join(parts) if parts else str(rel)
+
+            if name not in name_map:
+                name_map[name] = []
+            name_map[name].append(str(rel))
+
+        # Report results
+        collisions = {n: paths for n, paths in name_map.items()
+                      if len(paths) > 1}
+        unique_names = {n: paths for n,
+                        paths in name_map.items() if len(paths) == 1}
+
+        print(f"\n  ✅ Unique names: {len(unique_names)}")
+
+        if missing_names:
+            print(
+                f"\n  ⚠️  Skills missing frontmatter 'name' ({len(missing_names)}):")
+            for path in missing_names[:5]:
+                print(f"     - {path}")
+            if len(missing_names) > 5:
+                print(f"     ... and {len(missing_names) - 5} more")
+
+        if collisions:
+            print(f"\n  ❌ Name collisions ({len(collisions)}):")
+            for name, paths in collisions.items():
+                print(f"     '{name}':")
+                for p in paths:
+                    print(f"       - {p}")
        else:
-            print(f"\n  ℹ️  skills/ (direct, non-symlink): 0")
-            print(f"     └─ No direct skills found, but v3 would handle them (lines 84-86)")
-        
-        if skills_via_symlink_parent_count > 0:
-            print(f"\n  ⚠️  skills/ (via symlink parent): {skills_via_symlink_parent_count}")
-            print(f"     └─ May need special handling")
-        
+            print(f"\n  ✅ No collisions detected!")
+
+        # Validate all names are valid directory names
+        print("\n5️⃣ Directory Name Validation:")
+        invalid_names = []
+        for name in name_map:
+            if not re.match(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$", name):
+                invalid_names.append(name)
+
+        if invalid_names:
+            print(f"  ❌ Invalid directory names ({len(invalid_names)}):")
+            for name in invalid_names[:5]:
+                print(f"     - '{name}'")
+        else:
+            print(f"  ✅ All {len(name_map)} names are valid directory names!")
+
        # Summary
-        print("\n5️⃣ Summary:")
+        print("\n6️⃣ Summary:")
        print("-" * 60)
-        
-        total_handled = (github_skills_count + github_plugins_count + 
-                        skills_symlinked_count + skills_direct_count)
-        
-        print(f"\n  Total SKILL.md files: {len(all_skill_files)}")
-        print(f"  Handled by v3 script: {total_handled}")
-        
-        if total_handled == len(all_skill_files):
-            print(f"\n  ✅ 100% Coverage - All skills will be synced!")
-        elif total_handled >= len(all_skill_files) * 0.99:
-            print(f"\n  ✅ ~100% Coverage - Script handles all skills!")
-            print(f"     ({len(all_skill_files) - total_handled} skills may be duplicates)")
+        total = len(all_skill_files)
+        unique = len(unique_names) + len(collisions)
+
+        print(f"  Total SKILL.md files: {total}")
+        print(f"  Unique flat names: {len(unique_names)}")
+        print(f"  Collisions: {len(collisions)}")
+        print(f"  Missing names: {len(missing_names)}")
+
+        is_pass = len(collisions) == 0 and len(invalid_names) == 0
+        if is_pass:
+            print(f"\n  ✅ ALL CHECKS PASSED")
        else:
-            print(f"\n  ⚠️  Partial Coverage - Missing {len(all_skill_files) - total_handled} skills")
-            print(f"\n  Skills not covered:")
-            for loc_type, files in location_types.items():
-                if loc_type not in ["github_skills", "github_plugins", "skills_symlinked", "skills_direct"]:
-                    print(f"    - {loc_type}: {len(files)}")
-        
-        # Test specific cases
-        print("\n6️⃣ Testing Specific Edge Cases:")
-        print("-" * 60)
-        
-        skills_dir = temp_path / "skills"
-        if skills_dir.exists():
-            # Check for any non-symlink directories with SKILL.md
-            print("\n  Checking for non-symlinked skills in skills/...")
-            non_symlink_skills = []
-            
-            for item in skills_dir.rglob("*"):
-                if item.is_dir() and not item.is_symlink():
-                    if (item / "SKILL.md").exists():
-                        # Check if any parent is a symlink
-                        has_symlink_parent = False
-                        for parent in item.parents:
-                            if parent == skills_dir:
-                                break
-                            if parent.is_symlink():
-                                has_symlink_parent = True
-                                break
-                        
-                        if not has_symlink_parent:
-                            non_symlink_skills.append(item)
-            
-            if non_symlink_skills:
-                print(f"  ✅ Found {len(non_symlink_skills)} non-symlinked skills:")
-                for skill in non_symlink_skills[:5]:
-                    print(f"     - {skill.relative_to(skills_dir)}")
-                print(f"     These WILL be synced by v3 (lines 84-86)")
-            else:
-                print(f"  ℹ️  No non-symlinked skills found in skills/")
-                print(f"     But v3 is ready to handle them if they exist!")
-        
+            print(f"\n  ⚠️  SOME CHECKS NEED ATTENTION")
+
        print("\n✨ Analysis complete!")
-        
+
        return {
-            'total': len(all_skill_files),
-            'handled': total_handled,
-            'breakdown': {k: len(v) for k, v in location_types.items()}
+            "total": total,
+            "unique": len(unique_names),
+            "collisions": len(collisions),
+            "missing_names": len(missing_names),
+            "invalid_names": len(invalid_names),
+            "passed": is_pass,
        }

+
 if __name__ == "__main__":
    try:
        results = analyze_skill_locations()
-        
+
        print("\n" + "=" * 60)
        print("FINAL VERDICT")
        print("=" * 60)
-        
-        coverage_pct = (results['handled'] / results['total'] * 100) if results['total'] > 0 else 0
-        
-        print(f"\nCoverage: {coverage_pct:.1f}%")
-        print(f"Skills handled: {results['handled']}/{results['total']}")
-        
-        if coverage_pct >= 99:
-            print("\n✅ V3 SCRIPT IS COMPREHENSIVE")
-            print("   All skill locations are properly handled!")
+
+        if results["passed"]:
+            print("\n✅ V4 FLAT STRUCTURE IS VALID")
+            print("   All names are unique and valid directory names!")
        else:
-            print("\n⚠️  V3 SCRIPT MAY NEED ENHANCEMENT")
-            print("   Some edge cases might be missed")
-        
+            print("\n⚠️  V4 FLAT STRUCTURE NEEDS FIXES")
+            if results["collisions"] > 0:
+                print(f"   {results['collisions']} name collisions to resolve")
+            if results["invalid_names"] > 0:
+                print(f"   {results['invalid_names']} invalid directory names")
+
    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback