- Add _ensure_list_spacing() to handle lists without blank lines before them - Modify _md_to_html() to preprocess markdown content via stdin - Add automated test suite (scripts/tests/test_list_rendering.py) - Fix: Lists without preceding blank lines now render correctly - Original markdown files remain unmodified (preprocessing in memory only) Root cause: Pandoc requires blank lines before lists per CommonMark spec. Without preprocessing, lists following paragraphs render as plain text. Tested scenarios: ✅ Lists with blank lines (normal case) ✅ Lists without blank lines (critical fix) ✅ Ordered lists without blank lines ✅ Original file integrity preserved Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
167 lines
4.9 KiB
Python
Executable File
167 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
Test list rendering in PDF generation.
|
||
|
||
Verifies that markdown lists are correctly rendered in PDFs,
|
||
even when they don't have blank lines before them.
|
||
|
||
The original markdown files are NOT modified - preprocessing
|
||
happens in memory during conversion.
|
||
"""
|
||
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
from pathlib import Path
|
||
|
||
# Test markdown content with various list scenarios
|
||
TEST_MARKDOWN = """# 测试列表解析
|
||
|
||
## 场景1:列表前有空行(正常)
|
||
|
||
这是一段文字。
|
||
|
||
- 列表项 1
|
||
- 列表项 2
|
||
- 列表项 3
|
||
|
||
## 场景2:列表前没有空行(关键测试)
|
||
|
||
这是一段文字。
|
||
- 列表项 1
|
||
- 列表项 2
|
||
- 列表项 3
|
||
|
||
## 场景3:有序列表前没有空行
|
||
|
||
这是一段文字。
|
||
1. 第一项
|
||
2. 第二项
|
||
3. 第三项
|
||
|
||
## 场景4:有序列表前有空行(正常)
|
||
|
||
这是一段文字。
|
||
|
||
1. 第一项
|
||
2. 第二项
|
||
3. 第三项
|
||
"""
|
||
|
||
|
||
def run_test():
|
||
"""Run the list rendering test."""
|
||
# Create temporary files
|
||
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as md_file:
|
||
md_file.write(TEST_MARKDOWN)
|
||
md_path = md_file.name
|
||
|
||
pdf_path = md_path.replace('.md', '.pdf')
|
||
txt_path = md_path.replace('.md', '.txt')
|
||
|
||
try:
|
||
# Generate PDF
|
||
script_dir = Path(__file__).parent.parent
|
||
md_to_pdf = script_dir / 'md_to_pdf.py'
|
||
|
||
print(f"生成 PDF: {md_path} -> {pdf_path}")
|
||
result = subprocess.run(
|
||
['uv', 'run', '--with', 'weasyprint', str(md_to_pdf), md_path, pdf_path],
|
||
capture_output=True, text=True, cwd=script_dir.parent
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
print(f"❌ PDF 生成失败: {result.stderr}")
|
||
return False
|
||
|
||
print(f"✅ PDF 已生成")
|
||
|
||
# Extract text from PDF
|
||
result = subprocess.run(
|
||
['pdftotext', pdf_path, txt_path],
|
||
capture_output=True, text=True
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
print(f"❌ 文本提取失败: {result.stderr}")
|
||
return False
|
||
|
||
# Read extracted text
|
||
with open(txt_path, 'r', encoding='utf-8') as f:
|
||
pdf_text = f.read()
|
||
|
||
# Verify original file was not modified
|
||
with open(md_path, 'r', encoding='utf-8') as f:
|
||
original_content = f.read()
|
||
|
||
if original_content != TEST_MARKDOWN:
|
||
print("❌ 原始文件被修改了!")
|
||
return False
|
||
|
||
print("✅ 原始文件未被修改")
|
||
|
||
# Verify list rendering
|
||
print("\n=== 列表渲染验证 ===")
|
||
|
||
tests_passed = 0
|
||
tests_total = 4
|
||
|
||
# Test 1: List with blank line before it
|
||
if '• 列表项 1' in pdf_text:
|
||
print("✅ 场景1: 列表前有空行 - 正确渲染")
|
||
tests_passed += 1
|
||
else:
|
||
print("❌ 场景1: 列表前有空行 - 渲染失败")
|
||
|
||
# Test 2: Critical test - list without blank line before it
|
||
scene2_start = pdf_text.find('场景2')
|
||
scene2_section = pdf_text[scene2_start:scene2_start+200] if scene2_start != -1 else ""
|
||
|
||
if '• 列表项 1' in scene2_section and '- 列表项 1' not in scene2_section:
|
||
print("✅ 场景2: 列表前没有空行 - 正确渲染(关键测试)")
|
||
tests_passed += 1
|
||
else:
|
||
print("❌ 场景2: 列表前没有空行 - 渲染失败")
|
||
print(f" 实际内容: {scene2_section}")
|
||
|
||
# Test 3: Ordered list without blank line
|
||
scene3_start = pdf_text.find('场景3')
|
||
scene3_section = pdf_text[scene3_start:scene3_start+200] if scene3_start != -1 else ""
|
||
|
||
if '1. 第一项' in scene3_section and '2. 第二项' in scene3_section:
|
||
print("✅ 场景3: 有序列表前没有空行 - 正确渲染")
|
||
tests_passed += 1
|
||
else:
|
||
print("❌ 场景3: 有序列表前没有空行 - 渲染失败")
|
||
|
||
# Test 4: Ordered list with blank line
|
||
if '1. 第一项' in pdf_text and '2. 第二项' in pdf_text:
|
||
print("✅ 场景4: 有序列表前有空行 - 正确渲染")
|
||
tests_passed += 1
|
||
else:
|
||
print("❌ 场景4: 有序列表前有空行 - 渲染失败")
|
||
|
||
print(f"\n=== 测试结果: {tests_passed}/{tests_total} 通过 ===")
|
||
|
||
if tests_passed == tests_total:
|
||
print("\n✅ 所有测试通过!")
|
||
print(f"\n生成的文件:")
|
||
print(f" Markdown: {md_path}")
|
||
print(f" PDF: {pdf_path}")
|
||
print(f" Text: {txt_path}")
|
||
return True
|
||
else:
|
||
print(f"\n❌ {tests_total - tests_passed} 个测试失败")
|
||
return False
|
||
|
||
except Exception as e:
|
||
print(f"❌ 测试失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
|
||
if __name__ == '__main__':
|
||
success = run_test()
|
||
sys.exit(0 if success else 1)
|