feat(pdf-creator): add theme system + Chrome backend; add terraform-skill draft
- pdf-creator v1.2.0: theme system (default/warm-terra), dual backend (weasyprint/chrome auto-detect), argparse CLI, extracted CSS to themes/ - terraform-skill: operational traps from real deployments (provisioner timing, DNS duplication, multi-env isolation, pre-deploy validation) - asr-transcribe-to-text: add security scan marker Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,150 +1,110 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Markdown to PDF converter with Chinese font support.
|
||||
Markdown to PDF converter with Chinese font support and theme system.
|
||||
|
||||
Converts markdown files to PDF using pandoc (markdown→HTML) + weasyprint (HTML→PDF).
|
||||
Designed for formal documents (trademark filings, legal documents, reports).
|
||||
Converts markdown files to PDF using:
|
||||
- pandoc (markdown → HTML)
|
||||
- weasyprint or headless Chrome (HTML → PDF), auto-detected
|
||||
|
||||
Usage:
|
||||
python md_to_pdf.py input.md output.pdf
|
||||
python md_to_pdf.py input.md # outputs input.pdf
|
||||
python md_to_pdf.py input.md --theme warm-terra
|
||||
python md_to_pdf.py input.md --theme default --backend chrome
|
||||
python md_to_pdf.py input.md # outputs input.pdf, default theme, auto backend
|
||||
|
||||
Themes:
|
||||
Stored in ../themes/*.css. Built-in themes:
|
||||
- default: Songti SC + black/grey, formal documents
|
||||
- warm-terra: PingFang SC + terra cotta, training/workshop materials
|
||||
|
||||
Requirements:
|
||||
pip install weasyprint
|
||||
pandoc (system install, e.g. brew install pandoc)
|
||||
|
||||
macOS environment setup (if needed):
|
||||
export DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH"
|
||||
weasyprint (pip install weasyprint) OR Google Chrome (for --backend chrome)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
# Auto-configure library path on macOS ARM (Homebrew) — must be before weasyprint import
|
||||
if platform.system() == 'Darwin':
|
||||
_homebrew_lib = '/opt/homebrew/lib'
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
THEMES_DIR = SCRIPT_DIR.parent / "themes"
|
||||
|
||||
# macOS ARM: auto-configure library path for weasyprint
|
||||
if platform.system() == "Darwin":
|
||||
_homebrew_lib = "/opt/homebrew/lib"
|
||||
if Path(_homebrew_lib).is_dir():
|
||||
_cur = os.environ.get('DYLD_LIBRARY_PATH', '')
|
||||
_cur = os.environ.get("DYLD_LIBRARY_PATH", "")
|
||||
if _homebrew_lib not in _cur:
|
||||
os.environ['DYLD_LIBRARY_PATH'] = f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib
|
||||
|
||||
from weasyprint import CSS, HTML
|
||||
os.environ["DYLD_LIBRARY_PATH"] = (
|
||||
f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib
|
||||
)
|
||||
|
||||
|
||||
# CSS with Chinese font support
|
||||
CSS_STYLES = """
|
||||
@page {
|
||||
size: A4;
|
||||
margin: 2.5cm 2cm;
|
||||
}
|
||||
def _find_chrome() -> str | None:
|
||||
"""Find Chrome/Chromium binary path."""
|
||||
candidates = [
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
shutil.which("google-chrome"),
|
||||
shutil.which("chromium"),
|
||||
shutil.which("chrome"),
|
||||
]
|
||||
for c in candidates:
|
||||
if c and Path(c).exists():
|
||||
return str(c)
|
||||
return None
|
||||
|
||||
body {
|
||||
font-family: 'Songti SC', 'SimSun', 'STSong', 'Noto Serif CJK SC', serif;
|
||||
font-size: 12pt;
|
||||
line-height: 1.8;
|
||||
color: #000;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
|
||||
font-size: 18pt;
|
||||
font-weight: bold;
|
||||
text-align: center;
|
||||
margin-top: 0;
|
||||
margin-bottom: 1.5em;
|
||||
}
|
||||
def _has_weasyprint() -> bool:
|
||||
"""Check if weasyprint is importable."""
|
||||
try:
|
||||
import weasyprint # noqa: F401
|
||||
|
||||
h2 {
|
||||
font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
|
||||
font-size: 14pt;
|
||||
font-weight: bold;
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 0.8em;
|
||||
}
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
h3 {
|
||||
font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
|
||||
font-size: 12pt;
|
||||
font-weight: bold;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 0.5em;
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 0.8em 0;
|
||||
text-align: justify;
|
||||
}
|
||||
def _detect_backend() -> str:
|
||||
"""Auto-detect best available backend: weasyprint > chrome."""
|
||||
if _has_weasyprint():
|
||||
return "weasyprint"
|
||||
if _find_chrome():
|
||||
return "chrome"
|
||||
print(
|
||||
"Error: No PDF backend found. Install weasyprint (pip install weasyprint) "
|
||||
"or Google Chrome.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
ul, ol {
|
||||
margin: 0.8em 0;
|
||||
padding-left: 2em;
|
||||
}
|
||||
|
||||
li {
|
||||
margin: 0.4em 0;
|
||||
}
|
||||
def _load_theme(theme_name: str) -> str:
|
||||
"""Load CSS from themes directory."""
|
||||
theme_file = THEMES_DIR / f"{theme_name}.css"
|
||||
if not theme_file.exists():
|
||||
available = [f.stem for f in THEMES_DIR.glob("*.css")]
|
||||
print(
|
||||
f"Error: Theme '{theme_name}' not found. Available: {available}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
return theme_file.read_text(encoding="utf-8")
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 1em 0;
|
||||
font-size: 10pt;
|
||||
table-layout: fixed;
|
||||
}
|
||||
|
||||
th, td {
|
||||
border: 1px solid #666;
|
||||
padding: 8px 6px;
|
||||
text-align: left;
|
||||
overflow-wrap: break-word;
|
||||
word-break: normal;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: #f0f0f0;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: none;
|
||||
border-top: 1px solid #ccc;
|
||||
margin: 1.5em 0;
|
||||
}
|
||||
|
||||
strong {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: 'SF Mono', 'Monaco', 'Menlo', monospace;
|
||||
font-size: 10pt;
|
||||
background-color: #f5f5f5;
|
||||
padding: 0.2em 0.4em;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
pre {
|
||||
background-color: #f5f5f5;
|
||||
padding: 1em;
|
||||
overflow-x: auto;
|
||||
font-size: 10pt;
|
||||
line-height: 1.4;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
border-left: 3px solid #ccc;
|
||||
margin: 1em 0;
|
||||
padding-left: 1em;
|
||||
color: #555;
|
||||
}
|
||||
"""
|
||||
def _list_themes() -> list[str]:
|
||||
"""List available theme names."""
|
||||
if not THEMES_DIR.exists():
|
||||
return []
|
||||
return sorted(f.stem for f in THEMES_DIR.glob("*.css"))
|
||||
|
||||
|
||||
def _ensure_list_spacing(text: str) -> str:
|
||||
@@ -152,39 +112,36 @@ def _ensure_list_spacing(text: str) -> str:
|
||||
|
||||
Both Python markdown library and pandoc require a blank line before a list
|
||||
when it follows a paragraph. Without it, list items render as plain text.
|
||||
|
||||
This preprocessor adds blank lines before list items when needed, without
|
||||
modifying the user's original markdown file.
|
||||
"""
|
||||
lines = text.split('\n')
|
||||
lines = text.split("\n")
|
||||
result = []
|
||||
list_re = re.compile(r'^(\s*)([-*+]|\d+\.)\s')
|
||||
list_re = re.compile(r"^(\s*)([-*+]|\d+\.)\s")
|
||||
for i, line in enumerate(lines):
|
||||
if i > 0 and list_re.match(line):
|
||||
prev = lines[i - 1]
|
||||
if prev.strip() and not list_re.match(prev):
|
||||
result.append('')
|
||||
result.append("")
|
||||
result.append(line)
|
||||
return '\n'.join(result)
|
||||
return "\n".join(result)
|
||||
|
||||
|
||||
def _md_to_html(md_file: str) -> str:
|
||||
"""Convert markdown to HTML using pandoc with list spacing preprocessing.
|
||||
|
||||
Reads the markdown file, preprocesses it to ensure proper list spacing,
|
||||
then passes the content to pandoc via stdin. The original file is not modified.
|
||||
"""
|
||||
if not shutil.which('pandoc'):
|
||||
print("Error: pandoc not found. Install with: brew install pandoc", file=sys.stderr)
|
||||
"""Convert markdown to HTML using pandoc with list spacing preprocessing."""
|
||||
if not shutil.which("pandoc"):
|
||||
print(
|
||||
"Error: pandoc not found. Install with: brew install pandoc",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Read and preprocess markdown to ensure list spacing
|
||||
md_content = Path(md_file).read_text(encoding='utf-8')
|
||||
md_content = Path(md_file).read_text(encoding="utf-8")
|
||||
md_content = _ensure_list_spacing(md_content)
|
||||
|
||||
result = subprocess.run(
|
||||
['pandoc', '-f', 'markdown', '-t', 'html'],
|
||||
input=md_content, capture_output=True, text=True,
|
||||
["pandoc", "-f", "markdown", "-t", "html"],
|
||||
input=md_content,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"Error: pandoc failed: {result.stderr}", file=sys.stderr)
|
||||
@@ -193,58 +150,152 @@ def _md_to_html(md_file: str) -> str:
|
||||
return result.stdout
|
||||
|
||||
|
||||
def markdown_to_pdf(md_file: str, pdf_file: str | None = None) -> str:
|
||||
"""
|
||||
Convert markdown file to PDF with Chinese font support.
|
||||
|
||||
Args:
|
||||
md_file: Path to input markdown file
|
||||
pdf_file: Path to output PDF file (optional, defaults to same name as input)
|
||||
|
||||
Returns:
|
||||
Path to generated PDF file
|
||||
"""
|
||||
md_path = Path(md_file)
|
||||
|
||||
if pdf_file is None:
|
||||
pdf_file = str(md_path.with_suffix('.pdf'))
|
||||
|
||||
# Convert to HTML via pandoc
|
||||
html_content = _md_to_html(md_file)
|
||||
|
||||
# Create full HTML document
|
||||
full_html = f"""<!DOCTYPE html>
|
||||
def _build_full_html(html_content: str, css: str, title: str) -> str:
|
||||
"""Wrap HTML content in a full document with CSS."""
|
||||
return f"""<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{md_path.stem}</title>
|
||||
<title>{title}</title>
|
||||
<style>{css}</style>
|
||||
</head>
|
||||
<body>
|
||||
{html_content}
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# Generate PDF
|
||||
HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=CSS_STYLES)])
|
||||
|
||||
def _render_weasyprint(full_html: str, pdf_file: str, css: str) -> None:
|
||||
"""Render PDF using weasyprint."""
|
||||
from weasyprint import CSS, HTML
|
||||
|
||||
HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=css)])
|
||||
|
||||
|
||||
def _render_chrome(full_html: str, pdf_file: str) -> None:
|
||||
"""Render PDF using headless Chrome."""
|
||||
chrome = _find_chrome()
|
||||
if not chrome:
|
||||
print("Error: Chrome not found.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
suffix=".html", mode="w", encoding="utf-8", delete=False
|
||||
) as f:
|
||||
f.write(full_html)
|
||||
html_path = f.name
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
chrome,
|
||||
"--headless",
|
||||
"--disable-gpu",
|
||||
"--no-pdf-header-footer",
|
||||
f"--print-to-pdf={pdf_file}",
|
||||
html_path,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if not Path(pdf_file).exists():
|
||||
print(
|
||||
f"Error: Chrome failed to generate PDF. stderr: {result.stderr}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
finally:
|
||||
Path(html_path).unlink(missing_ok=True)
|
||||
|
||||
|
||||
def markdown_to_pdf(
|
||||
md_file: str,
|
||||
pdf_file: str | None = None,
|
||||
theme: str = "default",
|
||||
backend: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Convert markdown file to PDF.
|
||||
|
||||
Args:
|
||||
md_file: Path to input markdown file
|
||||
pdf_file: Path to output PDF (optional, defaults to same name as input)
|
||||
theme: Theme name (from themes/ directory)
|
||||
backend: 'weasyprint', 'chrome', or None (auto-detect)
|
||||
|
||||
Returns:
|
||||
Path to generated PDF file
|
||||
"""
|
||||
md_path = Path(md_file)
|
||||
if pdf_file is None:
|
||||
pdf_file = str(md_path.with_suffix(".pdf"))
|
||||
|
||||
if backend is None:
|
||||
backend = _detect_backend()
|
||||
|
||||
css = _load_theme(theme)
|
||||
html_content = _md_to_html(md_file)
|
||||
full_html = _build_full_html(html_content, css, md_path.stem)
|
||||
|
||||
if backend == "weasyprint":
|
||||
_render_weasyprint(full_html, pdf_file, css)
|
||||
elif backend == "chrome":
|
||||
_render_chrome(full_html, pdf_file)
|
||||
else:
|
||||
print(f"Error: Unknown backend '{backend}'", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
size_kb = Path(pdf_file).stat().st_size / 1024
|
||||
print(f"Generated: {pdf_file} ({size_kb:.0f}KB, theme={theme}, backend={backend})")
|
||||
return pdf_file
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python md_to_pdf.py <input.md> [output.pdf]")
|
||||
print("\nConverts markdown to PDF with Chinese font support.")
|
||||
available_themes = _list_themes()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Markdown to PDF with Chinese font support and themes."
|
||||
)
|
||||
parser.add_argument("input", help="Input markdown file")
|
||||
parser.add_argument("output", nargs="?", help="Output PDF file (optional)")
|
||||
parser.add_argument(
|
||||
"--theme",
|
||||
default="default",
|
||||
choices=available_themes or ["default"],
|
||||
help=f"CSS theme (available: {', '.join(available_themes) or 'default'})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--backend",
|
||||
choices=["weasyprint", "chrome"],
|
||||
default=None,
|
||||
help="PDF rendering backend (default: auto-detect)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list-themes",
|
||||
action="store_true",
|
||||
help="List available themes and exit",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.list_themes:
|
||||
for t in available_themes:
|
||||
marker = " (default)" if t == "default" else ""
|
||||
css_file = THEMES_DIR / f"{t}.css"
|
||||
first_line = ""
|
||||
for line in css_file.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("*") and "—" in line:
|
||||
first_line = line.lstrip("* ").strip()
|
||||
break
|
||||
print(f" {t}{marker}: {first_line}")
|
||||
sys.exit(0)
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: File not found: {args.input}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
md_file = sys.argv[1]
|
||||
pdf_file = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
|
||||
if not Path(md_file).exists():
|
||||
print(f"Error: File not found: {md_file}")
|
||||
sys.exit(1)
|
||||
|
||||
output = markdown_to_pdf(md_file, pdf_file)
|
||||
print(f"Generated: {output}")
|
||||
markdown_to_pdf(args.input, args.output, args.theme, args.backend)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user