#!/usr/bin/env python3 """ Markdown Link Checker for Firefrost Operations Manual Validates all internal links in .md files to ensure: 1. Referenced files exist 2. Links use correct paths (relative or absolute) 3. No broken cross-references Usage: python3 check-links.py [--fix] [--verbose] Options: --fix: Automatically fix common issues --verbose: Show detailed output """ import os import re import sys from pathlib import Path from typing import List, Tuple, Set class LinkChecker: def __init__(self, repo_root: str, fix: bool = False, verbose: bool = False): self.repo_root = Path(repo_root) self.fix = fix self.verbose = verbose self.broken_links: List[Tuple[str, str, str]] = [] self.checked_files: Set[str] = set() def find_markdown_files(self) -> List[Path]: """Find all .md files in repository""" md_files = [] for root, dirs, files in os.walk(self.repo_root): # Skip hidden directories and common excludes dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__']] for file in files: if file.endswith('.md'): md_files.append(Path(root) / file) return md_files def extract_links(self, content: str) -> List[Tuple[str, int]]: """Extract markdown links from content""" # Match [text](link) format link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)' links = [] for match in re.finditer(link_pattern, content): link = match.group(2) # Skip external URLs and anchors if not link.startswith(('http://', 'https://', '#', 'mailto:')): line_num = content[:match.start()].count('\n') + 1 links.append((link, line_num)) return links def resolve_link(self, source_file: Path, link: str) -> Path: """Resolve relative or absolute link to actual file path""" if link.startswith('/'): # Absolute from repo root return self.repo_root / link.lstrip('/') else: # Relative to source file return (source_file.parent / link).resolve() def check_file(self, file_path: Path): """Check all links in a single file""" self.checked_files.add(str(file_path.relative_to(self.repo_root))) with open(file_path, 'r', encoding='utf-8') as f: content = f.read() links = self.extract_links(content) for link, line_num in links: target = self.resolve_link(file_path, link) if not target.exists(): self.broken_links.append(( str(file_path.relative_to(self.repo_root)), link, f"Line {line_num}" )) if self.verbose: print(f" āŒ {file_path.name}:{line_num} -> {link} (NOT FOUND)") def run(self): """Run link checker on all markdown files""" print("šŸ” Scanning for markdown files...") md_files = self.find_markdown_files() print(f"Found {len(md_files)} markdown files\n") print("šŸ”— Checking links...") for file_path in md_files: if self.verbose: print(f"\nChecking: {file_path.relative_to(self.repo_root)}") self.check_file(file_path) self.report() def report(self): """Generate report of broken links""" print("\n" + "="*80) print("šŸ“Š LINK CHECK REPORT") print("="*80) print(f"\nFiles checked: {len(self.checked_files)}") print(f"Broken links found: {len(self.broken_links)}\n") if self.broken_links: print("āŒ BROKEN LINKS:") print("-" * 80) for source, link, location in self.broken_links: print(f"\nšŸ“„ {source}") print(f" {location}: {link}") print("\n" + "="*80) sys.exit(1) else: print("āœ… All links valid!") print("="*80) sys.exit(0) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Check markdown links in repository") parser.add_argument("--fix", action="store_true", help="Attempt to fix broken links") parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output") parser.add_argument("--repo", default=".", help="Repository root path") args = parser.parse_args() checker = LinkChecker( repo_root=args.repo, fix=args.fix, verbose=args.verbose ) checker.run()