feat(pdf-creator): add theme system + Chrome backend; add terraform-skill draft

- pdf-creator v1.2.0: theme system (default/warm-terra), dual backend (weasyprint/chrome auto-detect), argparse CLI, extracted CSS to themes/ - terraform-skill: operational traps from real deployments (provisioner timing, DNS duplication, multi-env isolation, pre-deploy validation) - asr-transcribe-to-text: add security scan marker Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 23:33:03 +08:00
parent b9facf3516
commit 87221d94d5
10 changed files with 1091 additions and 207 deletions
--- a/pdf-creator/scripts/md_to_pdf.py
+++ b/pdf-creator/scripts/md_to_pdf.py
@@ -1,150 +1,110 @@
 #!/usr/bin/env python3
 """
-Markdown to PDF converter with Chinese font support.
+Markdown to PDF converter with Chinese font support and theme system.

-Converts markdown files to PDF using pandoc (markdown→HTML) + weasyprint (HTML→PDF).
-Designed for formal documents (trademark filings, legal documents, reports).
+Converts markdown files to PDF using:
+  - pandoc (markdown → HTML)
+  - weasyprint or headless Chrome (HTML → PDF), auto-detected

 Usage:
    python md_to_pdf.py input.md output.pdf
-    python md_to_pdf.py input.md  # outputs input.pdf
+    python md_to_pdf.py input.md --theme warm-terra
+    python md_to_pdf.py input.md --theme default --backend chrome
+    python md_to_pdf.py input.md  # outputs input.pdf, default theme, auto backend
+
+Themes:
+    Stored in ../themes/*.css. Built-in themes:
+    - default:     Songti SC + black/grey, formal documents
+    - warm-terra:  PingFang SC + terra cotta, training/workshop materials

 Requirements:
-    pip install weasyprint
    pandoc (system install, e.g. brew install pandoc)
-
-    macOS environment setup (if needed):
-    export DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH"
+    weasyprint (pip install weasyprint) OR Google Chrome (for --backend chrome)
 """

+from __future__ import annotations
+
+import argparse
 import os
 import platform
 import re
 import shutil
 import subprocess
 import sys
+import tempfile
 from pathlib import Path

-# Auto-configure library path on macOS ARM (Homebrew) — must be before weasyprint import
-if platform.system() == 'Darwin':
-    _homebrew_lib = '/opt/homebrew/lib'
+SCRIPT_DIR = Path(__file__).resolve().parent
+THEMES_DIR = SCRIPT_DIR.parent / "themes"
+
+# macOS ARM: auto-configure library path for weasyprint
+if platform.system() == "Darwin":
+    _homebrew_lib = "/opt/homebrew/lib"
    if Path(_homebrew_lib).is_dir():
-        _cur = os.environ.get('DYLD_LIBRARY_PATH', '')
+        _cur = os.environ.get("DYLD_LIBRARY_PATH", "")
        if _homebrew_lib not in _cur:
-            os.environ['DYLD_LIBRARY_PATH'] = f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib
-
-from weasyprint import CSS, HTML
+            os.environ["DYLD_LIBRARY_PATH"] = (
+                f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib
+            )


-# CSS with Chinese font support
-CSS_STYLES = """
-@page {
-    size: A4;
-    margin: 2.5cm 2cm;
-}
+def _find_chrome() -> str | None:
+    """Find Chrome/Chromium binary path."""
+    candidates = [
+        "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+        "/Applications/Chromium.app/Contents/MacOS/Chromium",
+        shutil.which("google-chrome"),
+        shutil.which("chromium"),
+        shutil.which("chrome"),
+    ]
+    for c in candidates:
+        if c and Path(c).exists():
+            return str(c)
+    return None

-body {
-    font-family: 'Songti SC', 'SimSun', 'STSong', 'Noto Serif CJK SC', serif;
-    font-size: 12pt;
-    line-height: 1.8;
-    color: #000;
-    width: 100%;
-}

-h1 {
-    font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
-    font-size: 18pt;
-    font-weight: bold;
-    text-align: center;
-    margin-top: 0;
-    margin-bottom: 1.5em;
-}
+def _has_weasyprint() -> bool:
+    """Check if weasyprint is importable."""
+    try:
+        import weasyprint  # noqa: F401

-h2 {
-    font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
-    font-size: 14pt;
-    font-weight: bold;
-    margin-top: 1.5em;
-    margin-bottom: 0.8em;
-}
+        return True
+    except ImportError:
+        return False

-h3 {
-    font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
-    font-size: 12pt;
-    font-weight: bold;
-    margin-top: 1em;
-    margin-bottom: 0.5em;
-}

-p {
-    margin: 0.8em 0;
-    text-align: justify;
-}
+def _detect_backend() -> str:
+    """Auto-detect best available backend: weasyprint > chrome."""
+    if _has_weasyprint():
+        return "weasyprint"
+    if _find_chrome():
+        return "chrome"
+    print(
+        "Error: No PDF backend found. Install weasyprint (pip install weasyprint) "
+        "or Google Chrome.",
+        file=sys.stderr,
+    )
+    sys.exit(1)

-ul, ol {
-    margin: 0.8em 0;
-    padding-left: 2em;
-}

-li {
-    margin: 0.4em 0;
-}
+def _load_theme(theme_name: str) -> str:
+    """Load CSS from themes directory."""
+    theme_file = THEMES_DIR / f"{theme_name}.css"
+    if not theme_file.exists():
+        available = [f.stem for f in THEMES_DIR.glob("*.css")]
+        print(
+            f"Error: Theme '{theme_name}' not found. Available: {available}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    return theme_file.read_text(encoding="utf-8")

-table {
-    border-collapse: collapse;
-    width: 100%;
-    margin: 1em 0;
-    font-size: 10pt;
-    table-layout: fixed;
-}

-th, td {
-    border: 1px solid #666;
-    padding: 8px 6px;
-    text-align: left;
-    overflow-wrap: break-word;
-    word-break: normal;
-}
-
-th {
-    background-color: #f0f0f0;
-    font-weight: bold;
-}
-
-hr {
-    border: none;
-    border-top: 1px solid #ccc;
-    margin: 1.5em 0;
-}
-
-strong {
-    font-weight: bold;
-}
-
-code {
-    font-family: 'SF Mono', 'Monaco', 'Menlo', monospace;
-    font-size: 10pt;
-    background-color: #f5f5f5;
-    padding: 0.2em 0.4em;
-    border-radius: 3px;
-}
-
-pre {
-    background-color: #f5f5f5;
-    padding: 1em;
-    overflow-x: auto;
-    font-size: 10pt;
-    line-height: 1.4;
-    border-radius: 4px;
-}
-
-blockquote {
-    border-left: 3px solid #ccc;
-    margin: 1em 0;
-    padding-left: 1em;
-    color: #555;
-}
-"""
+def _list_themes() -> list[str]:
+    """List available theme names."""
+    if not THEMES_DIR.exists():
+        return []
+    return sorted(f.stem for f in THEMES_DIR.glob("*.css"))


 def _ensure_list_spacing(text: str) -> str:
@@ -152,39 +112,36 @@ def _ensure_list_spacing(text: str) -> str:

    Both Python markdown library and pandoc require a blank line before a list
    when it follows a paragraph. Without it, list items render as plain text.
-
-    This preprocessor adds blank lines before list items when needed, without
-    modifying the user's original markdown file.
    """
-    lines = text.split('\n')
+    lines = text.split("\n")
    result = []
-    list_re = re.compile(r'^(\s*)([-*+]|\d+\.)\s')
+    list_re = re.compile(r"^(\s*)([-*+]|\d+\.)\s")
    for i, line in enumerate(lines):
        if i > 0 and list_re.match(line):
            prev = lines[i - 1]
            if prev.strip() and not list_re.match(prev):
-                result.append('')
+                result.append("")
        result.append(line)
-    return '\n'.join(result)
+    return "\n".join(result)


 def _md_to_html(md_file: str) -> str:
-    """Convert markdown to HTML using pandoc with list spacing preprocessing.
-
-    Reads the markdown file, preprocesses it to ensure proper list spacing,
-    then passes the content to pandoc via stdin. The original file is not modified.
-    """
-    if not shutil.which('pandoc'):
-        print("Error: pandoc not found. Install with: brew install pandoc", file=sys.stderr)
+    """Convert markdown to HTML using pandoc with list spacing preprocessing."""
+    if not shutil.which("pandoc"):
+        print(
+            "Error: pandoc not found. Install with: brew install pandoc",
+            file=sys.stderr,
+        )
        sys.exit(1)

-    # Read and preprocess markdown to ensure list spacing
-    md_content = Path(md_file).read_text(encoding='utf-8')
+    md_content = Path(md_file).read_text(encoding="utf-8")
    md_content = _ensure_list_spacing(md_content)

    result = subprocess.run(
-        ['pandoc', '-f', 'markdown', '-t', 'html'],
-        input=md_content, capture_output=True, text=True,
+        ["pandoc", "-f", "markdown", "-t", "html"],
+        input=md_content,
+        capture_output=True,
+        text=True,
    )
    if result.returncode != 0:
        print(f"Error: pandoc failed: {result.stderr}", file=sys.stderr)
@@ -193,58 +150,152 @@ def _md_to_html(md_file: str) -> str:
    return result.stdout


-def markdown_to_pdf(md_file: str, pdf_file: str | None = None) -> str:
-    """
-    Convert markdown file to PDF with Chinese font support.
-
-    Args:
-        md_file: Path to input markdown file
-        pdf_file: Path to output PDF file (optional, defaults to same name as input)
-
-    Returns:
-        Path to generated PDF file
-    """
-    md_path = Path(md_file)
-
-    if pdf_file is None:
-        pdf_file = str(md_path.with_suffix('.pdf'))
-
-    # Convert to HTML via pandoc
-    html_content = _md_to_html(md_file)
-
-    # Create full HTML document
-    full_html = f"""<!DOCTYPE html>
+def _build_full_html(html_content: str, css: str, title: str) -> str:
+    """Wrap HTML content in a full document with CSS."""
+    return f"""<!DOCTYPE html>
 <html lang="zh-CN">
 <head>
    <meta charset="UTF-8">
-    <title>{md_path.stem}</title>
+    <title>{title}</title>
+    <style>{css}</style>
 </head>
 <body>
 {html_content}
 </body>
 </html>"""

-    # Generate PDF
-    HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=CSS_STYLES)])

+def _render_weasyprint(full_html: str, pdf_file: str, css: str) -> None:
+    """Render PDF using weasyprint."""
+    from weasyprint import CSS, HTML
+
+    HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=css)])
+
+
+def _render_chrome(full_html: str, pdf_file: str) -> None:
+    """Render PDF using headless Chrome."""
+    chrome = _find_chrome()
+    if not chrome:
+        print("Error: Chrome not found.", file=sys.stderr)
+        sys.exit(1)
+
+    with tempfile.NamedTemporaryFile(
+        suffix=".html", mode="w", encoding="utf-8", delete=False
+    ) as f:
+        f.write(full_html)
+        html_path = f.name
+
+    try:
+        result = subprocess.run(
+            [
+                chrome,
+                "--headless",
+                "--disable-gpu",
+                "--no-pdf-header-footer",
+                f"--print-to-pdf={pdf_file}",
+                html_path,
+            ],
+            capture_output=True,
+            text=True,
+        )
+        if not Path(pdf_file).exists():
+            print(
+                f"Error: Chrome failed to generate PDF. stderr: {result.stderr}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+    finally:
+        Path(html_path).unlink(missing_ok=True)
+
+
+def markdown_to_pdf(
+    md_file: str,
+    pdf_file: str | None = None,
+    theme: str = "default",
+    backend: str | None = None,
+) -> str:
+    """
+    Convert markdown file to PDF.
+
+    Args:
+        md_file: Path to input markdown file
+        pdf_file: Path to output PDF (optional, defaults to same name as input)
+        theme: Theme name (from themes/ directory)
+        backend: 'weasyprint', 'chrome', or None (auto-detect)
+
+    Returns:
+        Path to generated PDF file
+    """
+    md_path = Path(md_file)
+    if pdf_file is None:
+        pdf_file = str(md_path.with_suffix(".pdf"))
+
+    if backend is None:
+        backend = _detect_backend()
+
+    css = _load_theme(theme)
+    html_content = _md_to_html(md_file)
+    full_html = _build_full_html(html_content, css, md_path.stem)
+
+    if backend == "weasyprint":
+        _render_weasyprint(full_html, pdf_file, css)
+    elif backend == "chrome":
+        _render_chrome(full_html, pdf_file)
+    else:
+        print(f"Error: Unknown backend '{backend}'", file=sys.stderr)
+        sys.exit(1)
+
+    size_kb = Path(pdf_file).stat().st_size / 1024
+    print(f"Generated: {pdf_file} ({size_kb:.0f}KB, theme={theme}, backend={backend})")
    return pdf_file


 def main():
-    if len(sys.argv) < 2:
-        print("Usage: python md_to_pdf.py <input.md> [output.pdf]")
-        print("\nConverts markdown to PDF with Chinese font support.")
+    available_themes = _list_themes()
+
+    parser = argparse.ArgumentParser(
+        description="Markdown to PDF with Chinese font support and themes."
+    )
+    parser.add_argument("input", help="Input markdown file")
+    parser.add_argument("output", nargs="?", help="Output PDF file (optional)")
+    parser.add_argument(
+        "--theme",
+        default="default",
+        choices=available_themes or ["default"],
+        help=f"CSS theme (available: {', '.join(available_themes) or 'default'})",
+    )
+    parser.add_argument(
+        "--backend",
+        choices=["weasyprint", "chrome"],
+        default=None,
+        help="PDF rendering backend (default: auto-detect)",
+    )
+    parser.add_argument(
+        "--list-themes",
+        action="store_true",
+        help="List available themes and exit",
+    )
+
+    args = parser.parse_args()
+
+    if args.list_themes:
+        for t in available_themes:
+            marker = " (default)" if t == "default" else ""
+            css_file = THEMES_DIR / f"{t}.css"
+            first_line = ""
+            for line in css_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith("*") and "—" in line:
+                    first_line = line.lstrip("* ").strip()
+                    break
+            print(f"  {t}{marker}: {first_line}")
+        sys.exit(0)
+
+    if not Path(args.input).exists():
+        print(f"Error: File not found: {args.input}", file=sys.stderr)
        sys.exit(1)

-    md_file = sys.argv[1]
-    pdf_file = sys.argv[2] if len(sys.argv) > 2 else None
-
-    if not Path(md_file).exists():
-        print(f"Error: File not found: {md_file}")
-        sys.exit(1)
-
-    output = markdown_to_pdf(md_file, pdf_file)
-    print(f"Generated: {output}")
+    markdown_to_pdf(args.input, args.output, args.theme, args.backend)


 if __name__ == "__main__":