Files
antigravity-skills-reference/scripts/sync_microsoft_skills.py
Ahmed Rehan 44e51f0ea9 feat: sync all 140 Microsoft skills with collision protection
- Add find_github_skills() to discover skills in .github/skills/ not
  reachable via the skills/ symlink tree (picks up 11 missing skills)
- Add collision protection: if a target directory exists and was not
  from a previous Microsoft sync, append -ms suffix instead of overwriting
- Microsoft mcp-builder → mcp-builder-ms (community version preserved)
- Microsoft skill-creator → skill-creator-ms (community version preserved)
- Total skills: 856 (was 845, +11 newly discovered)
2026-02-12 15:34:42 +05:00

425 lines
14 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Sync Microsoft Skills Repository - v4 (Flat Structure)
Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory
name under skills/ to comply with the repository's indexing conventions.
"""
import re
import shutil
import subprocess
import tempfile
import json
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
REPO_ROOT = Path(__file__).parent.parent
TARGET_DIR = REPO_ROOT / "skills"
DOCS_DIR = REPO_ROOT / "docs"
ATTRIBUTION_FILE = DOCS_DIR / "microsoft-skills-attribution.json"
def clone_repo(temp_dir: Path):
"""Clone Microsoft skills repository (shallow)."""
print("🔄 Cloning Microsoft Skills repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)],
check=True,
)
def cleanup_previous_sync():
"""Remove skill directories from a previous sync using the attribution manifest."""
if not ATTRIBUTION_FILE.exists():
print(" No previous attribution file found — skipping cleanup.")
return 0
try:
with open(ATTRIBUTION_FILE) as f:
attribution = json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f" ⚠️ Could not read attribution file: {e}")
return 0
previous_skills = attribution.get("skills", [])
removed_count = 0
for skill in previous_skills:
flat_name = skill.get("flat_name", "")
if not flat_name:
continue
skill_dir = TARGET_DIR / flat_name
if skill_dir.exists() and skill_dir.is_dir():
shutil.rmtree(skill_dir)
removed_count += 1
print(
f" 🗑️ Removed {removed_count} previously synced skill directories.")
return removed_count
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def generate_fallback_name(relative_path: Path) -> str:
"""
Generate a fallback directory name when frontmatter 'name' is missing.
Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'.
"""
parts = [p for p in relative_path.parts if p]
return "ms-" + "-".join(parts)
def find_skills_in_directory(source_dir: Path):
"""
Walk the Microsoft repo's skills/ directory (which uses symlinks)
and resolve each to its actual SKILL.md content.
Returns list of dicts: {relative_path, skill_md_path, source_dir}.
"""
skills_source = source_dir / "skills"
results = []
if not skills_source.exists():
return results
for item in skills_source.rglob("*"):
if not item.is_dir():
continue
skill_md = None
actual_dir = None
if item.is_symlink():
try:
resolved = item.resolve()
if (resolved / "SKILL.md").exists():
skill_md = resolved / "SKILL.md"
actual_dir = resolved
except Exception:
continue
elif (item / "SKILL.md").exists():
skill_md = item / "SKILL.md"
actual_dir = item
if skill_md is None:
continue
try:
relative_path = item.relative_to(skills_source)
except ValueError:
continue
results.append({
"relative_path": relative_path,
"skill_md": skill_md,
"source_dir": actual_dir,
})
return results
def find_plugin_skills(source_dir: Path, already_synced_names: set):
"""Find plugin skills in .github/plugins/ that haven't been synced yet."""
results = []
github_plugins = source_dir / ".github" / "plugins"
if not github_plugins.exists():
return results
for skill_file in github_plugins.rglob("SKILL.md"):
skill_dir = skill_file.parent
skill_name = skill_dir.name
if skill_name not in already_synced_names:
results.append({
"relative_path": Path("plugins") / skill_name,
"skill_md": skill_file,
"source_dir": skill_dir,
})
return results
def find_github_skills(source_dir: Path, already_synced_names: set):
"""Find skills in .github/skills/ not reachable via the skills/ symlink tree."""
results = []
github_skills = source_dir / ".github" / "skills"
if not github_skills.exists():
return results
for skill_dir in github_skills.iterdir():
if not skill_dir.is_dir() or not (skill_dir / "SKILL.md").exists():
continue
if skill_dir.name not in already_synced_names:
results.append({
"relative_path": Path(".github/skills") / skill_dir.name,
"skill_md": skill_dir / "SKILL.md",
"source_dir": skill_dir,
})
return results
def sync_skills_flat(source_dir: Path, target_dir: Path):
"""
Sync all Microsoft skills into a flat structure under skills/.
Uses frontmatter 'name' as directory name, with collision detection.
Protects existing non-Microsoft skills from being overwritten.
"""
# Load previous attribution to know which dirs are Microsoft-owned
previously_synced_names = set()
if ATTRIBUTION_FILE.exists():
try:
with open(ATTRIBUTION_FILE) as f:
prev = json.load(f)
previously_synced_names = {
s["flat_name"] for s in prev.get("skills", []) if s.get("flat_name")
}
except (json.JSONDecodeError, OSError):
pass
all_skill_entries = find_skills_in_directory(source_dir)
print(f" 📂 Found {len(all_skill_entries)} skills in skills/ directory")
synced_count = 0
skill_metadata = []
# name -> original relative_path (for collision logging)
used_names: dict[str, str] = {}
for entry in all_skill_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = generate_fallback_name(entry["relative_path"])
print(
f" ⚠️ No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}")
# Internal collision detection (two Microsoft skills with same name)
if skill_name in used_names:
original = used_names[skill_name]
print(
f" ⚠️ Name collision '{skill_name}': {entry['relative_path']} vs {original}")
lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown"
skill_name = f"{skill_name}-{lang}"
print(f" Resolved to: {skill_name}")
# Protect existing non-Microsoft skills from being overwritten
target_skill_dir = target_dir / skill_name
if target_skill_dir.exists() and skill_name not in previously_synced_names:
original_name = skill_name
skill_name = f"{skill_name}-ms"
print(
f" ⚠️ '{original_name}' exists as a non-Microsoft skill, using: {skill_name}")
used_names[skill_name] = str(entry["relative_path"])
# Create flat target directory
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
# Copy SKILL.md
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
# Copy other files from the skill directory
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
# Collect all source directory names already synced (for dedup)
synced_names = set(used_names.keys())
already_synced_dir_names = {
e["source_dir"].name for e in all_skill_entries}
# Sync plugin skills from .github/plugins/
plugin_entries = find_plugin_skills(source_dir, already_synced_dir_names)
if plugin_entries:
print(f"\n 📦 Found {len(plugin_entries)} additional plugin skills")
for entry in plugin_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = entry["source_dir"].name
if skill_name in synced_names:
skill_name = f"{skill_name}-plugin"
# Protect existing non-Microsoft skills
target_skill_dir = target_dir / skill_name
if target_skill_dir.exists() and skill_name not in previously_synced_names:
original_name = skill_name
skill_name = f"{skill_name}-ms"
target_skill_dir = target_dir / skill_name
print(
f" ⚠️ '{original_name}' exists as a non-Microsoft skill, using: {skill_name}")
synced_names.add(skill_name)
already_synced_dir_names.add(entry["source_dir"].name)
target_skill_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills (plugin)",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
# Sync skills in .github/skills/ not reachable via the skills/ symlink tree
github_skill_entries = find_github_skills(
source_dir, already_synced_dir_names)
if github_skill_entries:
print(
f"\n <20> Found {len(github_skill_entries)} skills in .github/skills/ not linked from skills/")
for entry in github_skill_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = entry["source_dir"].name
if skill_name in synced_names:
skill_name = f"{skill_name}-github"
# Protect existing non-Microsoft skills
target_skill_dir = target_dir / skill_name
if target_skill_dir.exists() and skill_name not in previously_synced_names:
original_name = skill_name
skill_name = f"{skill_name}-ms"
target_skill_dir = target_dir / skill_name
print(
f" ⚠️ '{original_name}' exists as a non-Microsoft skill, using: {skill_name}")
synced_names.add(skill_name)
target_skill_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills (.github/skills)",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
return synced_count, skill_metadata
def save_attribution(metadata: list):
"""Save attribution metadata to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
attribution = {
"source": "microsoft/skills",
"repository": "https://github.com/microsoft/skills",
"license": "MIT",
"synced_skills": len(metadata),
"structure": "flat (frontmatter name as directory name)",
"skills": metadata,
}
with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f:
json.dump(attribution, f, indent=2)
def copy_license(source_dir: Path):
"""Copy the Microsoft LICENSE to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
if (source_dir / "LICENSE").exists():
shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT")
def main():
"""Main sync function."""
print("🚀 Microsoft Skills Sync Script v4 (Flat Structure)")
print("=" * 55)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
try:
clone_repo(temp_path)
TARGET_DIR.mkdir(parents=True, exist_ok=True)
print("\n🧹 Cleaning up previous sync...")
cleanup_previous_sync()
print("\n🔗 Resolving symlinks and flattening into skills/<name>/...")
count, metadata = sync_skills_flat(temp_path, TARGET_DIR)
print("\n📄 Saving attribution...")
save_attribution(metadata)
copy_license(temp_path)
print(
f"\n✨ Success! Synced {count} Microsoft skills (flat structure)")
print(f"📁 Location: {TARGET_DIR}/")
# Show summary of languages
languages = set()
for skill in metadata:
parts = skill["original_path"].split("/")
if len(parts) >= 1 and parts[0] != "plugins":
languages.add(parts[0])
print(f"\n📊 Organization:")
print(f" Total skills: {count}")
print(f" Languages: {', '.join(sorted(languages))}")
print("\n📋 Next steps:")
print("1. Run: npm run build")
print("2. Commit changes and create PR")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
exit(main())