Files
antigravity-skills-reference/scripts/sync_microsoft_skills.py
Ahmed Rehan 35556e0306 feat: add cleanup of stale skills before re-sync
sync_microsoft_skills.py now reads docs/microsoft-skills-attribution.json
to identify previously synced skill directories and removes them before
re-populating. This handles upstream renames, removals, and moves without
leaving orphaned skill directories.
2026-02-12 00:38:24 +05:00

328 lines
10 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Sync Microsoft Skills Repository - v4 (Flat Structure)
Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory
name under skills/ to comply with the repository's indexing conventions.
"""
import re
import shutil
import subprocess
import tempfile
import json
from pathlib import Path
MS_REPO = "https://github.com/microsoft/skills.git"
REPO_ROOT = Path(__file__).parent.parent
TARGET_DIR = REPO_ROOT / "skills"
DOCS_DIR = REPO_ROOT / "docs"
ATTRIBUTION_FILE = DOCS_DIR / "microsoft-skills-attribution.json"
def clone_repo(temp_dir: Path):
"""Clone Microsoft skills repository (shallow)."""
print("🔄 Cloning Microsoft Skills repository...")
subprocess.run(
["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)],
check=True,
)
def cleanup_previous_sync():
"""Remove skill directories from a previous sync using the attribution manifest."""
if not ATTRIBUTION_FILE.exists():
print(" No previous attribution file found — skipping cleanup.")
return 0
try:
with open(ATTRIBUTION_FILE) as f:
attribution = json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f" ⚠️ Could not read attribution file: {e}")
return 0
previous_skills = attribution.get("skills", [])
removed_count = 0
for skill in previous_skills:
flat_name = skill.get("flat_name", "")
if not flat_name:
continue
skill_dir = TARGET_DIR / flat_name
if skill_dir.exists() and skill_dir.is_dir():
shutil.rmtree(skill_dir)
removed_count += 1
print(
f" 🗑️ Removed {removed_count} previously synced skill directories.")
return removed_count
def extract_skill_name(skill_md_path: Path) -> str | None:
"""Extract the 'name' field from SKILL.md YAML frontmatter."""
try:
content = skill_md_path.read_text(encoding="utf-8")
except Exception:
return None
fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL)
if not fm_match:
return None
for line in fm_match.group(1).splitlines():
match = re.match(r"^name:\s*(.+)$", line)
if match:
value = match.group(1).strip().strip("\"'")
if value:
return value
return None
def generate_fallback_name(relative_path: Path) -> str:
"""
Generate a fallback directory name when frontmatter 'name' is missing.
Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'.
"""
parts = [p for p in relative_path.parts if p]
return "ms-" + "-".join(parts)
def find_skills_in_directory(source_dir: Path):
"""
Walk the Microsoft repo's skills/ directory (which uses symlinks)
and resolve each to its actual SKILL.md content.
Returns list of dicts: {relative_path, skill_md_path, source_dir}.
"""
skills_source = source_dir / "skills"
results = []
if not skills_source.exists():
return results
for item in skills_source.rglob("*"):
if not item.is_dir():
continue
skill_md = None
actual_dir = None
if item.is_symlink():
try:
resolved = item.resolve()
if (resolved / "SKILL.md").exists():
skill_md = resolved / "SKILL.md"
actual_dir = resolved
except Exception:
continue
elif (item / "SKILL.md").exists():
skill_md = item / "SKILL.md"
actual_dir = item
if skill_md is None:
continue
try:
relative_path = item.relative_to(skills_source)
except ValueError:
continue
results.append({
"relative_path": relative_path,
"skill_md": skill_md,
"source_dir": actual_dir,
})
return results
def find_plugin_skills(source_dir: Path, already_synced_names: set):
"""Find plugin skills in .github/plugins/ that haven't been synced yet."""
results = []
github_plugins = source_dir / ".github" / "plugins"
if not github_plugins.exists():
return results
for skill_file in github_plugins.rglob("SKILL.md"):
skill_dir = skill_file.parent
skill_name = skill_dir.name
if skill_name not in already_synced_names:
results.append({
"relative_path": Path("plugins") / skill_name,
"skill_md": skill_file,
"source_dir": skill_dir,
})
return results
def sync_skills_flat(source_dir: Path, target_dir: Path):
"""
Sync all Microsoft skills into a flat structure under skills/.
Uses frontmatter 'name' as directory name, with collision detection.
"""
all_skill_entries = find_skills_in_directory(source_dir)
print(f" 📂 Found {len(all_skill_entries)} skills in skills/ directory")
synced_count = 0
skill_metadata = []
# name -> original relative_path (for collision logging)
used_names: dict[str, str] = {}
for entry in all_skill_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = generate_fallback_name(entry["relative_path"])
print(
f" ⚠️ No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}")
# Collision detection
if skill_name in used_names:
original = used_names[skill_name]
print(
f" ⚠️ Name collision '{skill_name}': {entry['relative_path']} vs {original}")
# Append language prefix from path to disambiguate
lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown"
skill_name = f"{skill_name}-{lang}"
print(f" Resolved to: {skill_name}")
used_names[skill_name] = str(entry["relative_path"])
# Create flat target directory
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
# Copy SKILL.md
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
# Copy other files from the skill directory
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
# Sync plugin skills
synced_names = set(used_names.keys())
plugin_entries = find_plugin_skills(
source_dir, {e["source_dir"].name for e in all_skill_entries})
if plugin_entries:
print(f"\n 📦 Found {len(plugin_entries)} additional plugin skills")
for entry in plugin_entries:
skill_name = extract_skill_name(entry["skill_md"])
if not skill_name:
skill_name = entry["source_dir"].name
if skill_name in synced_names:
skill_name = f"{skill_name}-plugin"
synced_names.add(skill_name)
target_skill_dir = target_dir / skill_name
target_skill_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md")
for file_item in entry["source_dir"].iterdir():
if file_item.name != "SKILL.md" and file_item.is_file():
shutil.copy2(file_item, target_skill_dir / file_item.name)
skill_metadata.append({
"flat_name": skill_name,
"original_path": str(entry["relative_path"]),
"source": "microsoft/skills (plugin)",
})
synced_count += 1
print(f"{entry['relative_path']} → skills/{skill_name}/")
return synced_count, skill_metadata
def save_attribution(metadata: list):
"""Save attribution metadata to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
attribution = {
"source": "microsoft/skills",
"repository": "https://github.com/microsoft/skills",
"license": "MIT",
"synced_skills": len(metadata),
"structure": "flat (frontmatter name as directory name)",
"skills": metadata,
}
with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f:
json.dump(attribution, f, indent=2)
def copy_license(source_dir: Path):
"""Copy the Microsoft LICENSE to docs/."""
DOCS_DIR.mkdir(parents=True, exist_ok=True)
if (source_dir / "LICENSE").exists():
shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT")
def main():
"""Main sync function."""
print("🚀 Microsoft Skills Sync Script v4 (Flat Structure)")
print("=" * 55)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
try:
clone_repo(temp_path)
TARGET_DIR.mkdir(parents=True, exist_ok=True)
print("\n🧹 Cleaning up previous sync...")
cleanup_previous_sync()
print("\n🔗 Resolving symlinks and flattening into skills/<name>/...")
count, metadata = sync_skills_flat(temp_path, TARGET_DIR)
print("\n📄 Saving attribution...")
save_attribution(metadata)
copy_license(temp_path)
print(
f"\n✨ Success! Synced {count} Microsoft skills (flat structure)")
print(f"📁 Location: {TARGET_DIR}/")
# Show summary of languages
languages = set()
for skill in metadata:
parts = skill["original_path"].split("/")
if len(parts) >= 1 and parts[0] != "plugins":
languages.add(parts[0])
print(f"\n📊 Organization:")
print(f" Total skills: {count}")
print(f" Languages: {', '.join(sorted(languages))}")
print("\n📋 Next steps:")
print("1. Run: npm run build")
print("2. Commit changes and create PR")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
exit(main())