Add link validation script

Automated markdown link checker:
- Validates all internal links
- Checks relative/absolute paths
- Detects broken cross-references

Per Fresh Claude Review Priority #2: Add validation tooling.

Usage: python3 automation/check-links.py [--verbose]

Date: 2026-02-16
This commit is contained in:
2026-02-16 07:05:33 -06:00
parent d8f1139cf1
commit f3b45a0160

138
automation/check-links.py Normal file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""
Markdown Link Checker for Firefrost Operations Manual
Validates all internal links in .md files to ensure:
1. Referenced files exist
2. Links use correct paths (relative or absolute)
3. No broken cross-references
Usage:
python3 check-links.py [--fix] [--verbose]
Options:
--fix: Automatically fix common issues
--verbose: Show detailed output
"""
import os
import re
import sys
from pathlib import Path
from typing import List, Tuple, Set
class LinkChecker:
def __init__(self, repo_root: str, fix: bool = False, verbose: bool = False):
self.repo_root = Path(repo_root)
self.fix = fix
self.verbose = verbose
self.broken_links: List[Tuple[str, str, str]] = []
self.checked_files: Set[str] = set()
def find_markdown_files(self) -> List[Path]:
"""Find all .md files in repository"""
md_files = []
for root, dirs, files in os.walk(self.repo_root):
# Skip hidden directories and common excludes
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__']]
for file in files:
if file.endswith('.md'):
md_files.append(Path(root) / file)
return md_files
def extract_links(self, content: str) -> List[Tuple[str, int]]:
"""Extract markdown links from content"""
# Match [text](link) format
link_pattern = r'\[([^\]]+)\]\(([^\)]+)\)'
links = []
for match in re.finditer(link_pattern, content):
link = match.group(2)
# Skip external URLs and anchors
if not link.startswith(('http://', 'https://', '#', 'mailto:')):
line_num = content[:match.start()].count('\n') + 1
links.append((link, line_num))
return links
def resolve_link(self, source_file: Path, link: str) -> Path:
"""Resolve relative or absolute link to actual file path"""
if link.startswith('/'):
# Absolute from repo root
return self.repo_root / link.lstrip('/')
else:
# Relative to source file
return (source_file.parent / link).resolve()
def check_file(self, file_path: Path):
"""Check all links in a single file"""
self.checked_files.add(str(file_path.relative_to(self.repo_root)))
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
links = self.extract_links(content)
for link, line_num in links:
target = self.resolve_link(file_path, link)
if not target.exists():
self.broken_links.append((
str(file_path.relative_to(self.repo_root)),
link,
f"Line {line_num}"
))
if self.verbose:
print(f"{file_path.name}:{line_num} -> {link} (NOT FOUND)")
def run(self):
"""Run link checker on all markdown files"""
print("🔍 Scanning for markdown files...")
md_files = self.find_markdown_files()
print(f"Found {len(md_files)} markdown files\n")
print("🔗 Checking links...")
for file_path in md_files:
if self.verbose:
print(f"\nChecking: {file_path.relative_to(self.repo_root)}")
self.check_file(file_path)
self.report()
def report(self):
"""Generate report of broken links"""
print("\n" + "="*80)
print("📊 LINK CHECK REPORT")
print("="*80)
print(f"\nFiles checked: {len(self.checked_files)}")
print(f"Broken links found: {len(self.broken_links)}\n")
if self.broken_links:
print("❌ BROKEN LINKS:")
print("-" * 80)
for source, link, location in self.broken_links:
print(f"\n📄 {source}")
print(f" {location}: {link}")
print("\n" + "="*80)
sys.exit(1)
else:
print("✅ All links valid!")
print("="*80)
sys.exit(0)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Check markdown links in repository")
parser.add_argument("--fix", action="store_true", help="Attempt to fix broken links")
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
parser.add_argument("--repo", default=".", help="Repository root path")
args = parser.parse_args()
checker = LinkChecker(
repo_root=args.repo,
fix=args.fix,
verbose=args.verbose
)
checker.run()