Files
antigravity-skills-reference/scripts/sync_microsoft_skills.py
Ahmed Rehan e7ae616385 refactor: flatten Microsoft skills from nested to flat directory structure
Rewrote sync_microsoft_skills.py (v4) to use each SKILL.md's frontmatter
'name' field as the flat directory name under skills/, replacing the nested
skills/official/microsoft/<lang>/<category>/<service>/ hierarchy.

This fixes CI failures caused by the indexing, validation, and catalog
scripts expecting skills/<id>/SKILL.md (depth 1).

Changes:
- Rewrite scripts/sync_microsoft_skills.py for flat output with collision detection
- Update scripts/tests/inspect_microsoft_repo.py for flat name mapping
- Update scripts/tests/test_comprehensive_coverage.py for name uniqueness checks
- Delete skills/official/ nested directory
- Add 129 Microsoft skills as flat directories (e.g. skills/azure-mgmt-botservice-dotnet/)
- Move attribution files to docs/ (LICENSE-MICROSOFT, microsoft-skills-attribution.json)
- Rebuild skills_index.json, CATALOG.md, README.md (845 total skills)
2026-02-12 00:17:38 +05:00

294 lines
9.3 KiB
Python

#!/usr/bin/env python3
"""
Sync Microsoft Skills Repository - v4 (Flat Structure)
Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory
name under skills/ to comply with the repository's indexing conventions.
"""
import re
import shutil
import subprocess
import tempfile
import json
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
REPO_ROOT = Path(__file__).parent.parent
TARGET_DIR = REPO_ROOT / "skills"
DOCS_DIR = REPO_ROOT / "docs"
def clone_repo(temp_dir: Path):
"""Clone Microsoft skills repository (shallow)."""
print("🔄 Cloning Microsoft Skills repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)],
check=True,
)
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def generate_fallback_name(relative_path: Path) -> str:
"""
Generate a fallback directory name when frontmatter 'name' is missing.
Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'.
"""
parts = [p for p in relative_path.parts if p]
return "ms-" + "-".join(parts)
def find_skills_in_directory(source_dir: Path):
"""
Walk the Microsoft repo's skills/ directory (which uses symlinks)
and resolve each to its actual SKILL.md content.
Returns list of dicts: {relative_path, skill_md_path, source_dir}.
"""
skills_source = source_dir / "skills"
results = []
if not skills_source.exists():
return results
for item in skills_source.rglob("*"):
if not item.is_dir():
continue
skill_md = None
actual_dir = None
if item.is_symlink():
try:
resolved = item.resolve()
if (resolved / "SKILL.md").exists():
skill_md = resolved / "SKILL.md"
actual_dir = resolved
except Exception:
continue
elif (item / "SKILL.md").exists():
skill_md = item / "SKILL.md"
actual_dir = item
if skill_md is None:
continue
try:
relative_path = item.relative_to(skills_source)
except ValueError:
continue
results.append({
"relative_path": relative_path,
"skill_md": skill_md,
"source_dir": actual_dir,
})
return results
def find_plugin_skills(source_dir: Path, already_synced_names: set):
"""Find plugin skills in .github/plugins/ that haven't been synced yet."""
results = []
github_plugins = source_dir / ".github" / "plugins"
if not github_plugins.exists():
return results
for skill_file in github_plugins.rglob("SKILL.md"):
skill_dir = skill_file.parent
skill_name = skill_dir.name
if skill_name not in already_synced_names:
results.append({
"relative_path": Path("plugins") / skill_name,
"skill_md": skill_file,
"source_dir": skill_dir,
})
return results
def sync_skills_flat(source_dir: Path, target_dir: Path):
"""
Sync all Microsoft skills into a flat structure under skills/.
Uses frontmatter 'name' as directory name, with collision detection.
"""
all_skill_entries = find_skills_in_directory(source_dir)
print(f" 📂 Found {len(all_skill_entries)} skills in skills/ directory")
synced_count = 0
skill_metadata = []
# name -> original relative_path (for collision logging)
used_names: dict[str, str] = {}
for entry in all_skill_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = generate_fallback_name(entry["relative_path"])
print(
f" ⚠️ No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}")
# Collision detection
if skill_name in used_names:
original = used_names[skill_name]
print(
f" ⚠️ Name collision '{skill_name}': {entry['relative_path']} vs {original}")
# Append language prefix from path to disambiguate
lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown"
skill_name = f"{skill_name}-{lang}"
print(f" Resolved to: {skill_name}")
used_names[skill_name] = str(entry["relative_path"])
# Create flat target directory
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
# Copy SKILL.md
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
# Copy other files from the skill directory
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
# Sync plugin skills
synced_names = set(used_names.keys())
plugin_entries = find_plugin_skills(
source_dir, {e["source_dir"].name for e in all_skill_entries})
if plugin_entries:
print(f"\n 📦 Found {len(plugin_entries)} additional plugin skills")
for entry in plugin_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = entry["source_dir"].name
if skill_name in synced_names:
skill_name = f"{skill_name}-plugin"
synced_names.add(skill_name)
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills (plugin)",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
return synced_count, skill_metadata
def save_attribution(metadata: list):
"""Save attribution metadata to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
attribution = {
"source": "microsoft/skills",
"repository": "https://github.com/microsoft/skills",
"license": "MIT",
"synced_skills": len(metadata),
"structure": "flat (frontmatter name as directory name)",
"skills": metadata,
}
with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f:
json.dump(attribution, f, indent=2)
def copy_license(source_dir: Path):
"""Copy the Microsoft LICENSE to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
if (source_dir / "LICENSE").exists():
shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT")
def main():
"""Main sync function."""
print("🚀 Microsoft Skills Sync Script v4 (Flat Structure)")
print("=" * 55)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
try:
clone_repo(temp_path)
TARGET_DIR.mkdir(parents=True, exist_ok=True)
print("\n🔗 Resolving symlinks and flattening into skills/<name>/...")
count, metadata = sync_skills_flat(temp_path, TARGET_DIR)
print("\n📄 Saving attribution...")
save_attribution(metadata)
copy_license(temp_path)
print(
f"\n✨ Success! Synced {count} Microsoft skills (flat structure)")
print(f"📁 Location: {TARGET_DIR}/")
# Show summary of languages
languages = set()
for skill in metadata:
parts = skill["original_path"].split("/")
if len(parts) >= 1 and parts[0] != "plugins":
languages.add(parts[0])
print(f"\n📊 Organization:")
print(f" Total skills: {count}")
print(f" Languages: {', '.join(sorted(languages))}")
print("\n📋 Next steps:")
print("1. Delete old skills/official/ directory (if it exists)")
print("2. Run: npm run build")
print("3. Commit changes and create PR")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
exit(main())