feat(pdf-creator): add theme system + Chrome backend; add terraform-skill draft

- pdf-creator v1.2.0: theme system (default/warm-terra), dual backend
  (weasyprint/chrome auto-detect), argparse CLI, extracted CSS to themes/
- terraform-skill: operational traps from real deployments (provisioner
  timing, DNS duplication, multi-env isolation, pre-deploy validation)
- asr-transcribe-to-text: add security scan marker

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
daymade
2026-04-02 23:33:03 +08:00
parent b9facf3516
commit 87221d94d5
10 changed files with 1091 additions and 207 deletions

View File

@@ -1,150 +1,110 @@
#!/usr/bin/env python3
"""
Markdown to PDF converter with Chinese font support.
Markdown to PDF converter with Chinese font support and theme system.
Converts markdown files to PDF using pandoc (markdown→HTML) + weasyprint (HTML→PDF).
Designed for formal documents (trademark filings, legal documents, reports).
Converts markdown files to PDF using:
- pandoc (markdown → HTML)
- weasyprint or headless Chrome (HTML → PDF), auto-detected
Usage:
python md_to_pdf.py input.md output.pdf
python md_to_pdf.py input.md # outputs input.pdf
python md_to_pdf.py input.md --theme warm-terra
python md_to_pdf.py input.md --theme default --backend chrome
python md_to_pdf.py input.md # outputs input.pdf, default theme, auto backend
Themes:
Stored in ../themes/*.css. Built-in themes:
- default: Songti SC + black/grey, formal documents
- warm-terra: PingFang SC + terra cotta, training/workshop materials
Requirements:
pip install weasyprint
pandoc (system install, e.g. brew install pandoc)
macOS environment setup (if needed):
export DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH"
weasyprint (pip install weasyprint) OR Google Chrome (for --backend chrome)
"""
from __future__ import annotations
import argparse
import os
import platform
import re
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
# Auto-configure library path on macOS ARM (Homebrew) — must be before weasyprint import
if platform.system() == 'Darwin':
_homebrew_lib = '/opt/homebrew/lib'
SCRIPT_DIR = Path(__file__).resolve().parent
THEMES_DIR = SCRIPT_DIR.parent / "themes"
# macOS ARM: auto-configure library path for weasyprint
if platform.system() == "Darwin":
_homebrew_lib = "/opt/homebrew/lib"
if Path(_homebrew_lib).is_dir():
_cur = os.environ.get('DYLD_LIBRARY_PATH', '')
_cur = os.environ.get("DYLD_LIBRARY_PATH", "")
if _homebrew_lib not in _cur:
os.environ['DYLD_LIBRARY_PATH'] = f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib
from weasyprint import CSS, HTML
os.environ["DYLD_LIBRARY_PATH"] = (
f"{_homebrew_lib}:{_cur}" if _cur else _homebrew_lib
)
# CSS with Chinese font support
CSS_STYLES = """
@page {
size: A4;
margin: 2.5cm 2cm;
}
def _find_chrome() -> str | None:
"""Find Chrome/Chromium binary path."""
candidates = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
shutil.which("google-chrome"),
shutil.which("chromium"),
shutil.which("chrome"),
]
for c in candidates:
if c and Path(c).exists():
return str(c)
return None
body {
font-family: 'Songti SC', 'SimSun', 'STSong', 'Noto Serif CJK SC', serif;
font-size: 12pt;
line-height: 1.8;
color: #000;
width: 100%;
}
h1 {
font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
font-size: 18pt;
font-weight: bold;
text-align: center;
margin-top: 0;
margin-bottom: 1.5em;
}
def _has_weasyprint() -> bool:
"""Check if weasyprint is importable."""
try:
import weasyprint # noqa: F401
h2 {
font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
font-size: 14pt;
font-weight: bold;
margin-top: 1.5em;
margin-bottom: 0.8em;
}
return True
except ImportError:
return False
h3 {
font-family: 'Heiti SC', 'SimHei', 'STHeiti', 'Noto Sans CJK SC', sans-serif;
font-size: 12pt;
font-weight: bold;
margin-top: 1em;
margin-bottom: 0.5em;
}
p {
margin: 0.8em 0;
text-align: justify;
}
def _detect_backend() -> str:
"""Auto-detect best available backend: weasyprint > chrome."""
if _has_weasyprint():
return "weasyprint"
if _find_chrome():
return "chrome"
print(
"Error: No PDF backend found. Install weasyprint (pip install weasyprint) "
"or Google Chrome.",
file=sys.stderr,
)
sys.exit(1)
ul, ol {
margin: 0.8em 0;
padding-left: 2em;
}
li {
margin: 0.4em 0;
}
def _load_theme(theme_name: str) -> str:
"""Load CSS from themes directory."""
theme_file = THEMES_DIR / f"{theme_name}.css"
if not theme_file.exists():
available = [f.stem for f in THEMES_DIR.glob("*.css")]
print(
f"Error: Theme '{theme_name}' not found. Available: {available}",
file=sys.stderr,
)
sys.exit(1)
return theme_file.read_text(encoding="utf-8")
table {
border-collapse: collapse;
width: 100%;
margin: 1em 0;
font-size: 10pt;
table-layout: fixed;
}
th, td {
border: 1px solid #666;
padding: 8px 6px;
text-align: left;
overflow-wrap: break-word;
word-break: normal;
}
th {
background-color: #f0f0f0;
font-weight: bold;
}
hr {
border: none;
border-top: 1px solid #ccc;
margin: 1.5em 0;
}
strong {
font-weight: bold;
}
code {
font-family: 'SF Mono', 'Monaco', 'Menlo', monospace;
font-size: 10pt;
background-color: #f5f5f5;
padding: 0.2em 0.4em;
border-radius: 3px;
}
pre {
background-color: #f5f5f5;
padding: 1em;
overflow-x: auto;
font-size: 10pt;
line-height: 1.4;
border-radius: 4px;
}
blockquote {
border-left: 3px solid #ccc;
margin: 1em 0;
padding-left: 1em;
color: #555;
}
"""
def _list_themes() -> list[str]:
"""List available theme names."""
if not THEMES_DIR.exists():
return []
return sorted(f.stem for f in THEMES_DIR.glob("*.css"))
def _ensure_list_spacing(text: str) -> str:
@@ -152,39 +112,36 @@ def _ensure_list_spacing(text: str) -> str:
Both Python markdown library and pandoc require a blank line before a list
when it follows a paragraph. Without it, list items render as plain text.
This preprocessor adds blank lines before list items when needed, without
modifying the user's original markdown file.
"""
lines = text.split('\n')
lines = text.split("\n")
result = []
list_re = re.compile(r'^(\s*)([-*+]|\d+\.)\s')
list_re = re.compile(r"^(\s*)([-*+]|\d+\.)\s")
for i, line in enumerate(lines):
if i > 0 and list_re.match(line):
prev = lines[i - 1]
if prev.strip() and not list_re.match(prev):
result.append('')
result.append("")
result.append(line)
return '\n'.join(result)
return "\n".join(result)
def _md_to_html(md_file: str) -> str:
"""Convert markdown to HTML using pandoc with list spacing preprocessing.
Reads the markdown file, preprocesses it to ensure proper list spacing,
then passes the content to pandoc via stdin. The original file is not modified.
"""
if not shutil.which('pandoc'):
print("Error: pandoc not found. Install with: brew install pandoc", file=sys.stderr)
"""Convert markdown to HTML using pandoc with list spacing preprocessing."""
if not shutil.which("pandoc"):
print(
"Error: pandoc not found. Install with: brew install pandoc",
file=sys.stderr,
)
sys.exit(1)
# Read and preprocess markdown to ensure list spacing
md_content = Path(md_file).read_text(encoding='utf-8')
md_content = Path(md_file).read_text(encoding="utf-8")
md_content = _ensure_list_spacing(md_content)
result = subprocess.run(
['pandoc', '-f', 'markdown', '-t', 'html'],
input=md_content, capture_output=True, text=True,
["pandoc", "-f", "markdown", "-t", "html"],
input=md_content,
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f"Error: pandoc failed: {result.stderr}", file=sys.stderr)
@@ -193,58 +150,152 @@ def _md_to_html(md_file: str) -> str:
return result.stdout
def markdown_to_pdf(md_file: str, pdf_file: str | None = None) -> str:
"""
Convert markdown file to PDF with Chinese font support.
Args:
md_file: Path to input markdown file
pdf_file: Path to output PDF file (optional, defaults to same name as input)
Returns:
Path to generated PDF file
"""
md_path = Path(md_file)
if pdf_file is None:
pdf_file = str(md_path.with_suffix('.pdf'))
# Convert to HTML via pandoc
html_content = _md_to_html(md_file)
# Create full HTML document
full_html = f"""<!DOCTYPE html>
def _build_full_html(html_content: str, css: str, title: str) -> str:
"""Wrap HTML content in a full document with CSS."""
return f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<title>{md_path.stem}</title>
<title>{title}</title>
<style>{css}</style>
</head>
<body>
{html_content}
</body>
</html>"""
# Generate PDF
HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=CSS_STYLES)])
def _render_weasyprint(full_html: str, pdf_file: str, css: str) -> None:
"""Render PDF using weasyprint."""
from weasyprint import CSS, HTML
HTML(string=full_html).write_pdf(pdf_file, stylesheets=[CSS(string=css)])
def _render_chrome(full_html: str, pdf_file: str) -> None:
"""Render PDF using headless Chrome."""
chrome = _find_chrome()
if not chrome:
print("Error: Chrome not found.", file=sys.stderr)
sys.exit(1)
with tempfile.NamedTemporaryFile(
suffix=".html", mode="w", encoding="utf-8", delete=False
) as f:
f.write(full_html)
html_path = f.name
try:
result = subprocess.run(
[
chrome,
"--headless",
"--disable-gpu",
"--no-pdf-header-footer",
f"--print-to-pdf={pdf_file}",
html_path,
],
capture_output=True,
text=True,
)
if not Path(pdf_file).exists():
print(
f"Error: Chrome failed to generate PDF. stderr: {result.stderr}",
file=sys.stderr,
)
sys.exit(1)
finally:
Path(html_path).unlink(missing_ok=True)
def markdown_to_pdf(
md_file: str,
pdf_file: str | None = None,
theme: str = "default",
backend: str | None = None,
) -> str:
"""
Convert markdown file to PDF.
Args:
md_file: Path to input markdown file
pdf_file: Path to output PDF (optional, defaults to same name as input)
theme: Theme name (from themes/ directory)
backend: 'weasyprint', 'chrome', or None (auto-detect)
Returns:
Path to generated PDF file
"""
md_path = Path(md_file)
if pdf_file is None:
pdf_file = str(md_path.with_suffix(".pdf"))
if backend is None:
backend = _detect_backend()
css = _load_theme(theme)
html_content = _md_to_html(md_file)
full_html = _build_full_html(html_content, css, md_path.stem)
if backend == "weasyprint":
_render_weasyprint(full_html, pdf_file, css)
elif backend == "chrome":
_render_chrome(full_html, pdf_file)
else:
print(f"Error: Unknown backend '{backend}'", file=sys.stderr)
sys.exit(1)
size_kb = Path(pdf_file).stat().st_size / 1024
print(f"Generated: {pdf_file} ({size_kb:.0f}KB, theme={theme}, backend={backend})")
return pdf_file
def main():
if len(sys.argv) < 2:
print("Usage: python md_to_pdf.py <input.md> [output.pdf]")
print("\nConverts markdown to PDF with Chinese font support.")
available_themes = _list_themes()
parser = argparse.ArgumentParser(
description="Markdown to PDF with Chinese font support and themes."
)
parser.add_argument("input", help="Input markdown file")
parser.add_argument("output", nargs="?", help="Output PDF file (optional)")
parser.add_argument(
"--theme",
default="default",
choices=available_themes or ["default"],
help=f"CSS theme (available: {', '.join(available_themes) or 'default'})",
)
parser.add_argument(
"--backend",
choices=["weasyprint", "chrome"],
default=None,
help="PDF rendering backend (default: auto-detect)",
)
parser.add_argument(
"--list-themes",
action="store_true",
help="List available themes and exit",
)
args = parser.parse_args()
if args.list_themes:
for t in available_themes:
marker = " (default)" if t == "default" else ""
css_file = THEMES_DIR / f"{t}.css"
first_line = ""
for line in css_file.read_text().splitlines():
line = line.strip()
if line.startswith("*") and "" in line:
first_line = line.lstrip("* ").strip()
break
print(f" {t}{marker}: {first_line}")
sys.exit(0)
if not Path(args.input).exists():
print(f"Error: File not found: {args.input}", file=sys.stderr)
sys.exit(1)
md_file = sys.argv[1]
pdf_file = sys.argv[2] if len(sys.argv) > 2 else None
if not Path(md_file).exists():
print(f"Error: File not found: {md_file}")
sys.exit(1)
output = markdown_to_pdf(md_file, pdf_file)
print(f"Generated: {output}")
markdown_to_pdf(args.input, args.output, args.theme, args.backend)
if __name__ == "__main__":