fix(pdf-creator): restore list spacing preprocessor for pandoc

- Add _ensure_list_spacing() to handle lists without blank lines before them
- Modify _md_to_html() to preprocess markdown content via stdin
- Add automated test suite (scripts/tests/test_list_rendering.py)
- Fix: Lists without preceding blank lines now render correctly
- Original markdown files remain unmodified (preprocessing in memory only)

Root cause: Pandoc requires blank lines before lists per CommonMark spec.
Without preprocessing, lists following paragraphs render as plain text.

Tested scenarios:
 Lists with blank lines (normal case)
 Lists without blank lines (critical fix)
 Ordered lists without blank lines
 Original file integrity preserved

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
daymade
2026-03-11 13:58:39 +08:00
parent c49e23e7ef
commit 6dc2805f03
2 changed files with 199 additions and 3 deletions

View File

@@ -0,0 +1,166 @@
#!/usr/bin/env python3
"""
Test list rendering in PDF generation.
Verifies that markdown lists are correctly rendered in PDFs,
even when they don't have blank lines before them.
The original markdown files are NOT modified - preprocessing
happens in memory during conversion.
"""
import subprocess
import sys
import tempfile
from pathlib import Path
# Test markdown content with various list scenarios
TEST_MARKDOWN = """# 测试列表解析
## 场景1列表前有空行正常
这是一段文字。
- 列表项 1
- 列表项 2
- 列表项 3
## 场景2列表前没有空行关键测试
这是一段文字。
- 列表项 1
- 列表项 2
- 列表项 3
## 场景3有序列表前没有空行
这是一段文字。
1. 第一项
2. 第二项
3. 第三项
## 场景4有序列表前有空行正常
这是一段文字。
1. 第一项
2. 第二项
3. 第三项
"""
def run_test():
"""Run the list rendering test."""
# Create temporary files
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as md_file:
md_file.write(TEST_MARKDOWN)
md_path = md_file.name
pdf_path = md_path.replace('.md', '.pdf')
txt_path = md_path.replace('.md', '.txt')
try:
# Generate PDF
script_dir = Path(__file__).parent.parent
md_to_pdf = script_dir / 'md_to_pdf.py'
print(f"生成 PDF: {md_path} -> {pdf_path}")
result = subprocess.run(
['uv', 'run', '--with', 'weasyprint', str(md_to_pdf), md_path, pdf_path],
capture_output=True, text=True, cwd=script_dir.parent
)
if result.returncode != 0:
print(f"❌ PDF 生成失败: {result.stderr}")
return False
print(f"✅ PDF 已生成")
# Extract text from PDF
result = subprocess.run(
['pdftotext', pdf_path, txt_path],
capture_output=True, text=True
)
if result.returncode != 0:
print(f"❌ 文本提取失败: {result.stderr}")
return False
# Read extracted text
with open(txt_path, 'r', encoding='utf-8') as f:
pdf_text = f.read()
# Verify original file was not modified
with open(md_path, 'r', encoding='utf-8') as f:
original_content = f.read()
if original_content != TEST_MARKDOWN:
print("❌ 原始文件被修改了!")
return False
print("✅ 原始文件未被修改")
# Verify list rendering
print("\n=== 列表渲染验证 ===")
tests_passed = 0
tests_total = 4
# Test 1: List with blank line before it
if '• 列表项 1' in pdf_text:
print("✅ 场景1: 列表前有空行 - 正确渲染")
tests_passed += 1
else:
print("❌ 场景1: 列表前有空行 - 渲染失败")
# Test 2: Critical test - list without blank line before it
scene2_start = pdf_text.find('场景2')
scene2_section = pdf_text[scene2_start:scene2_start+200] if scene2_start != -1 else ""
if '• 列表项 1' in scene2_section and '- 列表项 1' not in scene2_section:
print("✅ 场景2: 列表前没有空行 - 正确渲染(关键测试)")
tests_passed += 1
else:
print("❌ 场景2: 列表前没有空行 - 渲染失败")
print(f" 实际内容: {scene2_section}")
# Test 3: Ordered list without blank line
scene3_start = pdf_text.find('场景3')
scene3_section = pdf_text[scene3_start:scene3_start+200] if scene3_start != -1 else ""
if '1. 第一项' in scene3_section and '2. 第二项' in scene3_section:
print("✅ 场景3: 有序列表前没有空行 - 正确渲染")
tests_passed += 1
else:
print("❌ 场景3: 有序列表前没有空行 - 渲染失败")
# Test 4: Ordered list with blank line
if '1. 第一项' in pdf_text and '2. 第二项' in pdf_text:
print("✅ 场景4: 有序列表前有空行 - 正确渲染")
tests_passed += 1
else:
print("❌ 场景4: 有序列表前有空行 - 渲染失败")
print(f"\n=== 测试结果: {tests_passed}/{tests_total} 通过 ===")
if tests_passed == tests_total:
print("\n✅ 所有测试通过!")
print(f"\n生成的文件:")
print(f" Markdown: {md_path}")
print(f" PDF: {pdf_path}")
print(f" Text: {txt_path}")
return True
else:
print(f"\n{tests_total - tests_passed} 个测试失败")
return False
except Exception as e:
print(f"❌ 测试失败: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == '__main__':
success = run_test()
sys.exit(0 if success else 1)