#!/usr/bin/env python3 """ Sync Microsoft Skills Repository - v4 (Flat Structure) Reads each SKILL.md frontmatter 'name' field and uses it as a flat directory name under skills/ to comply with the repository's indexing conventions. """ import re import shutil import subprocess import tempfile import json from pathlib import Path MS_REPO = "https://github.com/microsoft/skills.git" REPO_ROOT = Path(__file__).parent.parent TARGET_DIR = REPO_ROOT / "skills" DOCS_DIR = REPO_ROOT / "docs" ATTRIBUTION_FILE = DOCS_DIR / "microsoft-skills-attribution.json" def clone_repo(temp_dir: Path): """Clone Microsoft skills repository (shallow).""" print("๐Ÿ”„ Cloning Microsoft Skills repository...") subprocess.run( ["git", "clone", "--depth", "1", MS_REPO, str(temp_dir)], check=True, ) def cleanup_previous_sync(): """Remove skill directories from a previous sync using the attribution manifest.""" if not ATTRIBUTION_FILE.exists(): print(" โ„น๏ธ No previous attribution file found โ€” skipping cleanup.") return 0 try: with open(ATTRIBUTION_FILE) as f: attribution = json.load(f) except (json.JSONDecodeError, OSError) as e: print(f" โš ๏ธ Could not read attribution file: {e}") return 0 previous_skills = attribution.get("skills", []) removed_count = 0 for skill in previous_skills: flat_name = skill.get("flat_name", "") if not flat_name: continue skill_dir = TARGET_DIR / flat_name if skill_dir.exists() and skill_dir.is_dir(): shutil.rmtree(skill_dir) removed_count += 1 print( f" ๐Ÿ—‘๏ธ Removed {removed_count} previously synced skill directories.") return removed_count def extract_skill_name(skill_md_path: Path) -> str | None: """Extract the 'name' field from SKILL.md YAML frontmatter.""" try: content = skill_md_path.read_text(encoding="utf-8") except Exception: return None fm_match = re.search(r"^---\s*\n(.*?)\n---", content, re.DOTALL) if not fm_match: return None for line in fm_match.group(1).splitlines(): match = re.match(r"^name:\s*(.+)$", line) if match: value = match.group(1).strip().strip("\"'") if value: return value return None def generate_fallback_name(relative_path: Path) -> str: """ Generate a fallback directory name when frontmatter 'name' is missing. Converts a path like 'dotnet/compute/botservice' to 'ms-dotnet-compute-botservice'. """ parts = [p for p in relative_path.parts if p] return "ms-" + "-".join(parts) def find_skills_in_directory(source_dir: Path): """ Walk the Microsoft repo's skills/ directory (which uses symlinks) and resolve each to its actual SKILL.md content. Returns list of dicts: {relative_path, skill_md_path, source_dir}. """ skills_source = source_dir / "skills" results = [] if not skills_source.exists(): return results for item in skills_source.rglob("*"): if not item.is_dir(): continue skill_md = None actual_dir = None if item.is_symlink(): try: resolved = item.resolve() if (resolved / "SKILL.md").exists(): skill_md = resolved / "SKILL.md" actual_dir = resolved except Exception: continue elif (item / "SKILL.md").exists(): skill_md = item / "SKILL.md" actual_dir = item if skill_md is None: continue try: relative_path = item.relative_to(skills_source) except ValueError: continue results.append({ "relative_path": relative_path, "skill_md": skill_md, "source_dir": actual_dir, }) return results def find_plugin_skills(source_dir: Path, already_synced_names: set): """Find plugin skills in .github/plugins/ that haven't been synced yet.""" results = [] github_plugins = source_dir / ".github" / "plugins" if not github_plugins.exists(): return results for skill_file in github_plugins.rglob("SKILL.md"): skill_dir = skill_file.parent skill_name = skill_dir.name if skill_name not in already_synced_names: results.append({ "relative_path": Path("plugins") / skill_name, "skill_md": skill_file, "source_dir": skill_dir, }) return results def find_github_skills(source_dir: Path, already_synced_names: set): """Find skills in .github/skills/ not reachable via the skills/ symlink tree.""" results = [] github_skills = source_dir / ".github" / "skills" if not github_skills.exists(): return results for skill_dir in github_skills.iterdir(): if not skill_dir.is_dir() or not (skill_dir / "SKILL.md").exists(): continue if skill_dir.name not in already_synced_names: results.append({ "relative_path": Path(".github/skills") / skill_dir.name, "skill_md": skill_dir / "SKILL.md", "source_dir": skill_dir, }) return results def sync_skills_flat(source_dir: Path, target_dir: Path): """ Sync all Microsoft skills into a flat structure under skills/. Uses frontmatter 'name' as directory name, with collision detection. Protects existing non-Microsoft skills from being overwritten. """ # Load previous attribution to know which dirs are Microsoft-owned previously_synced_names = set() if ATTRIBUTION_FILE.exists(): try: with open(ATTRIBUTION_FILE) as f: prev = json.load(f) previously_synced_names = { s["flat_name"] for s in prev.get("skills", []) if s.get("flat_name") } except (json.JSONDecodeError, OSError): pass all_skill_entries = find_skills_in_directory(source_dir) print(f" ๐Ÿ“‚ Found {len(all_skill_entries)} skills in skills/ directory") synced_count = 0 skill_metadata = [] # name -> original relative_path (for collision logging) used_names: dict[str, str] = {} for entry in all_skill_entries: skill_name = extract_skill_name(entry["skill_md"]) if not skill_name: skill_name = generate_fallback_name(entry["relative_path"]) print( f" โš ๏ธ No frontmatter name for {entry['relative_path']}, using fallback: {skill_name}") # Internal collision detection (two Microsoft skills with same name) if skill_name in used_names: original = used_names[skill_name] print( f" โš ๏ธ Name collision '{skill_name}': {entry['relative_path']} vs {original}") lang = entry["relative_path"].parts[0] if entry["relative_path"].parts else "unknown" skill_name = f"{skill_name}-{lang}" print(f" Resolved to: {skill_name}") # Protect existing non-Microsoft skills from being overwritten target_skill_dir = target_dir / skill_name if target_skill_dir.exists() and skill_name not in previously_synced_names: original_name = skill_name skill_name = f"{skill_name}-ms" print( f" โš ๏ธ '{original_name}' exists as a non-Microsoft skill, using: {skill_name}") used_names[skill_name] = str(entry["relative_path"]) # Create flat target directory target_skill_dir = target_dir / skill_name target_skill_dir.mkdir(parents=True, exist_ok=True) # Copy SKILL.md shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md") # Copy other files from the skill directory for file_item in entry["source_dir"].iterdir(): if file_item.name != "SKILL.md" and file_item.is_file(): shutil.copy2(file_item, target_skill_dir / file_item.name) skill_metadata.append({ "flat_name": skill_name, "original_path": str(entry["relative_path"]), "source": "microsoft/skills", }) synced_count += 1 print(f" โœ… {entry['relative_path']} โ†’ skills/{skill_name}/") # Collect all source directory names already synced (for dedup) synced_names = set(used_names.keys()) already_synced_dir_names = { e["source_dir"].name for e in all_skill_entries} # Sync plugin skills from .github/plugins/ plugin_entries = find_plugin_skills(source_dir, already_synced_dir_names) if plugin_entries: print(f"\n ๐Ÿ“ฆ Found {len(plugin_entries)} additional plugin skills") for entry in plugin_entries: skill_name = extract_skill_name(entry["skill_md"]) if not skill_name: skill_name = entry["source_dir"].name if skill_name in synced_names: skill_name = f"{skill_name}-plugin" # Protect existing non-Microsoft skills target_skill_dir = target_dir / skill_name if target_skill_dir.exists() and skill_name not in previously_synced_names: original_name = skill_name skill_name = f"{skill_name}-ms" target_skill_dir = target_dir / skill_name print( f" โš ๏ธ '{original_name}' exists as a non-Microsoft skill, using: {skill_name}") synced_names.add(skill_name) already_synced_dir_names.add(entry["source_dir"].name) target_skill_dir.mkdir(parents=True, exist_ok=True) shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md") for file_item in entry["source_dir"].iterdir(): if file_item.name != "SKILL.md" and file_item.is_file(): shutil.copy2(file_item, target_skill_dir / file_item.name) skill_metadata.append({ "flat_name": skill_name, "original_path": str(entry["relative_path"]), "source": "microsoft/skills (plugin)", }) synced_count += 1 print(f" โœ… {entry['relative_path']} โ†’ skills/{skill_name}/") # Sync skills in .github/skills/ not reachable via the skills/ symlink tree github_skill_entries = find_github_skills( source_dir, already_synced_dir_names) if github_skill_entries: print( f"\n ๏ฟฝ Found {len(github_skill_entries)} skills in .github/skills/ not linked from skills/") for entry in github_skill_entries: skill_name = extract_skill_name(entry["skill_md"]) if not skill_name: skill_name = entry["source_dir"].name if skill_name in synced_names: skill_name = f"{skill_name}-github" # Protect existing non-Microsoft skills target_skill_dir = target_dir / skill_name if target_skill_dir.exists() and skill_name not in previously_synced_names: original_name = skill_name skill_name = f"{skill_name}-ms" target_skill_dir = target_dir / skill_name print( f" โš ๏ธ '{original_name}' exists as a non-Microsoft skill, using: {skill_name}") synced_names.add(skill_name) target_skill_dir.mkdir(parents=True, exist_ok=True) shutil.copy2(entry["skill_md"], target_skill_dir / "SKILL.md") for file_item in entry["source_dir"].iterdir(): if file_item.name != "SKILL.md" and file_item.is_file(): shutil.copy2(file_item, target_skill_dir / file_item.name) skill_metadata.append({ "flat_name": skill_name, "original_path": str(entry["relative_path"]), "source": "microsoft/skills (.github/skills)", }) synced_count += 1 print(f" โœ… {entry['relative_path']} โ†’ skills/{skill_name}/") return synced_count, skill_metadata def save_attribution(metadata: list): """Save attribution metadata to docs/.""" DOCS_DIR.mkdir(parents=True, exist_ok=True) attribution = { "source": "microsoft/skills", "repository": "https://github.com/microsoft/skills", "license": "MIT", "synced_skills": len(metadata), "structure": "flat (frontmatter name as directory name)", "skills": metadata, } with open(DOCS_DIR / "microsoft-skills-attribution.json", "w") as f: json.dump(attribution, f, indent=2) def copy_license(source_dir: Path): """Copy the Microsoft LICENSE to docs/.""" DOCS_DIR.mkdir(parents=True, exist_ok=True) if (source_dir / "LICENSE").exists(): shutil.copy2(source_dir / "LICENSE", DOCS_DIR / "LICENSE-MICROSOFT") def main(): """Main sync function.""" print("๐Ÿš€ Microsoft Skills Sync Script v4 (Flat Structure)") print("=" * 55) with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) try: clone_repo(temp_path) TARGET_DIR.mkdir(parents=True, exist_ok=True) print("\n๐Ÿงน Cleaning up previous sync...") cleanup_previous_sync() print("\n๐Ÿ”— Resolving symlinks and flattening into skills//...") count, metadata = sync_skills_flat(temp_path, TARGET_DIR) print("\n๐Ÿ“„ Saving attribution...") save_attribution(metadata) copy_license(temp_path) print( f"\nโœจ Success! Synced {count} Microsoft skills (flat structure)") print(f"๐Ÿ“ Location: {TARGET_DIR}/") # Show summary of languages languages = set() for skill in metadata: parts = skill["original_path"].split("/") if len(parts) >= 1 and parts[0] != "plugins": languages.add(parts[0]) print(f"\n๐Ÿ“Š Organization:") print(f" Total skills: {count}") print(f" Languages: {', '.join(sorted(languages))}") print("\n๐Ÿ“‹ Next steps:") print("1. Run: npm run build") print("2. Commit changes and create PR") except Exception as e: print(f"\nโŒ Error: {e}") import traceback traceback.print_exc() return 1 return 0 if __name__ == "__main__": exit(main())