Automated markdown link checker: - Validates all internal links - Checks relative/absolute paths - Detects broken cross-references Per Fresh Claude Review Priority #2: Add validation tooling. Usage: python3 automation/check-links.py [--verbose] Date: 2026-02-16
139 lines
4.7 KiB
Python
139 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Markdown Link Checker for Firefrost Operations Manual
|
|
|
|
Validates all internal links in .md files to ensure:
|
|
1. Referenced files exist
|
|
2. Links use correct paths (relative or absolute)
|
|
3. No broken cross-references
|
|
|
|
Usage:
|
|
python3 check-links.py [--fix] [--verbose]
|
|
|
|
Options:
|
|
--fix: Automatically fix common issues
|
|
--verbose: Show detailed output
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List, Tuple, Set
|
|
|
|
class LinkChecker:
|
|
def __init__(self, repo_root: str, fix: bool = False, verbose: bool = False):
|
|
self.repo_root = Path(repo_root)
|
|
self.fix = fix
|
|
self.verbose = verbose
|
|
self.broken_links: List[Tuple[str, str, str]] = []
|
|
self.checked_files: Set[str] = set()
|
|
|
|
def find_markdown_files(self) -> List[Path]:
|
|
"""Find all .md files in repository"""
|
|
md_files = []
|
|
for root, dirs, files in os.walk(self.repo_root):
|
|
# Skip hidden directories and common excludes
|
|
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__']]
|
|
for file in files:
|
|
if file.endswith('.md'):
|
|
md_files.append(Path(root) / file)
|
|
return md_files
|
|
|
|
def extract_links(self, content: str) -> List[Tuple[str, int]]:
|
|
"""Extract markdown links from content"""
|
|
# Match [text](link) format
|
|
link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
|
|
links = []
|
|
for match in re.finditer(link_pattern, content):
|
|
link = match.group(2)
|
|
# Skip external URLs and anchors
|
|
if not link.startswith(('http://', 'https://', '#', 'mailto:')):
|
|
line_num = content[:match.start()].count('\n') + 1
|
|
links.append((link, line_num))
|
|
return links
|
|
|
|
def resolve_link(self, source_file: Path, link: str) -> Path:
|
|
"""Resolve relative or absolute link to actual file path"""
|
|
if link.startswith('/'):
|
|
# Absolute from repo root
|
|
return self.repo_root / link.lstrip('/')
|
|
else:
|
|
# Relative to source file
|
|
return (source_file.parent / link).resolve()
|
|
|
|
def check_file(self, file_path: Path):
|
|
"""Check all links in a single file"""
|
|
self.checked_files.add(str(file_path.relative_to(self.repo_root)))
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
links = self.extract_links(content)
|
|
|
|
for link, line_num in links:
|
|
target = self.resolve_link(file_path, link)
|
|
|
|
if not target.exists():
|
|
self.broken_links.append((
|
|
str(file_path.relative_to(self.repo_root)),
|
|
link,
|
|
f"Line {line_num}"
|
|
))
|
|
if self.verbose:
|
|
print(f" ❌ {file_path.name}:{line_num} -> {link} (NOT FOUND)")
|
|
|
|
def run(self):
|
|
"""Run link checker on all markdown files"""
|
|
print("🔍 Scanning for markdown files...")
|
|
md_files = self.find_markdown_files()
|
|
print(f"Found {len(md_files)} markdown files\n")
|
|
|
|
print("🔗 Checking links...")
|
|
for file_path in md_files:
|
|
if self.verbose:
|
|
print(f"\nChecking: {file_path.relative_to(self.repo_root)}")
|
|
self.check_file(file_path)
|
|
|
|
self.report()
|
|
|
|
def report(self):
|
|
"""Generate report of broken links"""
|
|
print("\n" + "="*80)
|
|
print("📊 LINK CHECK REPORT")
|
|
print("="*80)
|
|
|
|
print(f"\nFiles checked: {len(self.checked_files)}")
|
|
print(f"Broken links found: {len(self.broken_links)}\n")
|
|
|
|
if self.broken_links:
|
|
print("❌ BROKEN LINKS:")
|
|
print("-" * 80)
|
|
for source, link, location in self.broken_links:
|
|
print(f"\n📄 {source}")
|
|
print(f" {location}: {link}")
|
|
print("\n" + "="*80)
|
|
sys.exit(1)
|
|
else:
|
|
print("✅ All links valid!")
|
|
print("="*80)
|
|
sys.exit(0)
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Check markdown links in repository")
|
|
parser.add_argument("--fix", action="store_true", help="Attempt to fix broken links")
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
parser.add_argument("--repo", default=".", help="Repository root path")
|
|
|
|
args = parser.parse_args()
|
|
|
|
checker = LinkChecker(
|
|
repo_root=args.repo,
|
|
fix=args.fix,
|
|
verbose=args.verbose
|
|
)
|
|
|
|
checker.run()
|