Add link validation script
Automated markdown link checker: - Validates all internal links - Checks relative/absolute paths - Detects broken cross-references Per Fresh Claude Review Priority #2: Add validation tooling. Usage: python3 automation/check-links.py [--verbose] Date: 2026-02-16
This commit is contained in:
138
automation/check-links.py
Normal file
138
automation/check-links.py
Normal file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Markdown Link Checker for Firefrost Operations Manual
|
||||
|
||||
Validates all internal links in .md files to ensure:
|
||||
1. Referenced files exist
|
||||
2. Links use correct paths (relative or absolute)
|
||||
3. No broken cross-references
|
||||
|
||||
Usage:
|
||||
python3 check-links.py [--fix] [--verbose]
|
||||
|
||||
Options:
|
||||
--fix: Automatically fix common issues
|
||||
--verbose: Show detailed output
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Set
|
||||
|
||||
class LinkChecker:
|
||||
def __init__(self, repo_root: str, fix: bool = False, verbose: bool = False):
|
||||
self.repo_root = Path(repo_root)
|
||||
self.fix = fix
|
||||
self.verbose = verbose
|
||||
self.broken_links: List[Tuple[str, str, str]] = []
|
||||
self.checked_files: Set[str] = set()
|
||||
|
||||
def find_markdown_files(self) -> List[Path]:
|
||||
"""Find all .md files in repository"""
|
||||
md_files = []
|
||||
for root, dirs, files in os.walk(self.repo_root):
|
||||
# Skip hidden directories and common excludes
|
||||
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__']]
|
||||
for file in files:
|
||||
if file.endswith('.md'):
|
||||
md_files.append(Path(root) / file)
|
||||
return md_files
|
||||
|
||||
def extract_links(self, content: str) -> List[Tuple[str, int]]:
|
||||
"""Extract markdown links from content"""
|
||||
# Match [text](link) format
|
||||
link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
|
||||
links = []
|
||||
for match in re.finditer(link_pattern, content):
|
||||
link = match.group(2)
|
||||
# Skip external URLs and anchors
|
||||
if not link.startswith(('http://', 'https://', '#', 'mailto:')):
|
||||
line_num = content[:match.start()].count('\n') + 1
|
||||
links.append((link, line_num))
|
||||
return links
|
||||
|
||||
def resolve_link(self, source_file: Path, link: str) -> Path:
|
||||
"""Resolve relative or absolute link to actual file path"""
|
||||
if link.startswith('/'):
|
||||
# Absolute from repo root
|
||||
return self.repo_root / link.lstrip('/')
|
||||
else:
|
||||
# Relative to source file
|
||||
return (source_file.parent / link).resolve()
|
||||
|
||||
def check_file(self, file_path: Path):
|
||||
"""Check all links in a single file"""
|
||||
self.checked_files.add(str(file_path.relative_to(self.repo_root)))
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
links = self.extract_links(content)
|
||||
|
||||
for link, line_num in links:
|
||||
target = self.resolve_link(file_path, link)
|
||||
|
||||
if not target.exists():
|
||||
self.broken_links.append((
|
||||
str(file_path.relative_to(self.repo_root)),
|
||||
link,
|
||||
f"Line {line_num}"
|
||||
))
|
||||
if self.verbose:
|
||||
print(f" ❌ {file_path.name}:{line_num} -> {link} (NOT FOUND)")
|
||||
|
||||
def run(self):
|
||||
"""Run link checker on all markdown files"""
|
||||
print("🔍 Scanning for markdown files...")
|
||||
md_files = self.find_markdown_files()
|
||||
print(f"Found {len(md_files)} markdown files\n")
|
||||
|
||||
print("🔗 Checking links...")
|
||||
for file_path in md_files:
|
||||
if self.verbose:
|
||||
print(f"\nChecking: {file_path.relative_to(self.repo_root)}")
|
||||
self.check_file(file_path)
|
||||
|
||||
self.report()
|
||||
|
||||
def report(self):
|
||||
"""Generate report of broken links"""
|
||||
print("\n" + "="*80)
|
||||
print("📊 LINK CHECK REPORT")
|
||||
print("="*80)
|
||||
|
||||
print(f"\nFiles checked: {len(self.checked_files)}")
|
||||
print(f"Broken links found: {len(self.broken_links)}\n")
|
||||
|
||||
if self.broken_links:
|
||||
print("❌ BROKEN LINKS:")
|
||||
print("-" * 80)
|
||||
for source, link, location in self.broken_links:
|
||||
print(f"\n📄 {source}")
|
||||
print(f" {location}: {link}")
|
||||
print("\n" + "="*80)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("✅ All links valid!")
|
||||
print("="*80)
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Check markdown links in repository")
|
||||
parser.add_argument("--fix", action="store_true", help="Attempt to fix broken links")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
||||
parser.add_argument("--repo", default=".", help="Repository root path")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
checker = LinkChecker(
|
||||
repo_root=args.repo,
|
||||
fix=args.fix,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
checker.run()
|
||||
Reference in New Issue
Block a user