#!/usr/bin/env python3 """ Markdown to PDF converter with Chinese font support. Converts markdown files to PDF using weasyprint, with proper Chinese typography. Designed for formal documents (trademark filings, legal documents, reports). Usage: python md_to_pdf.py input.md output.pdf python md_to_pdf.py input.md # outputs input.pdf Requirements: pip install weasyprint markdown macOS environment setup (if needed): export DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH" """ import os import platform import re import sys from pathlib import Path # Auto-configure library path on macOS ARM (Homebrew) — must be before weasyprint import if platform.system() == 'Darwin': _homebrew_lib = '/opt/homebrew/lib' if Path(_homebrew_lib).is_dir(): _cur = os.environ.get('DYLD_LIBRARY_PATH', '') if _homebrew_lib not in _cur: os.environ['DYLD_LIBRARY_PATH'] = f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib import markdown from weasyprint import CSS, HTML # CSS with Chinese font support CSS_STYLES = """ @page { size: A4; margin: 2.5cm 2cm; } body { font-family: 'Songti SC', 'SimSun', 'STSong', 'Noto Serif CJK SC', serif; font-size: 12pt; line-height: 1.8; color: #000; width: 100%; } h1 { font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif; font-size: 18pt; font-weight: bold; text-align: center; margin-top: 0; margin-bottom: 1.5em; } h2 { font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif; font-size: 14pt; font-weight: bold; margin-top: 1.5em; margin-bottom: 0.8em; } h3 { font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif; font-size: 12pt; font-weight: bold; margin-top: 1em; margin-bottom: 0.5em; } p { margin: 0.8em 0; text-align: justify; } ul, ol { margin: 0.8em 0; padding-left: 2em; } li { margin: 0.4em 0; } table { border-collapse: collapse; width: 100%; margin: 1em 0; font-size: 10pt; table-layout: fixed; } th, td { border: 1px solid #666; padding: 8px 6px; text-align: left; overflow-wrap: break-word; word-break: normal; } th { background-color: #f0f0f0; font-weight: bold; } hr { border: none; border-top: 1px solid #ccc; margin: 1.5em 0; } strong { font-weight: bold; } code { font-family: 'SF Mono', 'Monaco', 'Menlo', monospace; font-size: 10pt; background-color: #f5f5f5; padding: 0.2em 0.4em; border-radius: 3px; } pre { background-color: #f5f5f5; padding: 1em; overflow-x: auto; font-size: 10pt; line-height: 1.4; border-radius: 4px; } blockquote { border-left: 3px solid #ccc; margin: 1em 0; padding-left: 1em; color: #555; } """ def _ensure_list_spacing(text: str) -> str: """Ensure blank lines before list items for proper markdown parsing. The Python markdown library requires a blank line before a list when it follows a paragraph. Without it, list items render as plain text. """ lines = text.split('\n') result = [] list_re = re.compile(r'^(\s*)([-*+]|\d+\.)\s') for i, line in enumerate(lines): if i > 0 and list_re.match(line): prev = lines[i - 1] if prev.strip() and not list_re.match(prev): result.append('') result.append(line) return '\n'.join(result) def markdown_to_pdf(md_file: str, pdf_file: str | None = None) -> str: """ Convert markdown file to PDF with Chinese font support. Args: md_file: Path to input markdown file pdf_file: Path to output PDF file (optional, defaults to same name as input) Returns: Path to generated PDF file """ md_path = Path(md_file) if pdf_file is None: pdf_file = str(md_path.with_suffix('.pdf')) # Read and preprocess markdown content md_content = _ensure_list_spacing(md_path.read_text(encoding='utf-8')) # Convert to HTML html_content = markdown.markdown( md_content, extensions=['tables', 'fenced_code', 'codehilite', 'toc'] ) # Create full HTML document full_html = f""" {md_path.stem} {html_content} """ # Generate PDF HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=CSS_STYLES)]) return pdf_file def main(): if len(sys.argv) < 2: print("Usage: python md_to_pdf.py [output.pdf]") print("\nConverts markdown to PDF with Chinese font support.") sys.exit(1) md_file = sys.argv[1] pdf_file = sys.argv[2] if len(sys.argv) > 2 else None if not Path(md_file).exists(): print(f"Error: File not found: {md_file}") sys.exit(1) output = markdown_to_pdf(md_file, pdf_file) print(f"Generated: {output}") if __name__ == "__main__": main()