fix(pdf-creator): restore list spacing preprocessor for pandoc

- Add _ensure_list_spacing() to handle lists without blank lines before them - Modify _md_to_html() to preprocess markdown content via stdin - Add automated test suite (scripts/tests/test_list_rendering.py) - Fix: Lists without preceding blank lines now render correctly - Original markdown files remain unmodified (preprocessing in memory only) Root cause: Pandoc requires blank lines before lists per CommonMark spec. Without preprocessing, lists following paragraphs render as plain text. Tested scenarios: ✅ Lists with blank lines (normal case) ✅ Lists without blank lines (critical fix) ✅ Ordered lists without blank lines ✅ Original file integrity preserved Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-11 13:58:39 +08:00
parent c49e23e7ef
commit 6dc2805f03
2 changed files with 199 additions and 3 deletions
--- a/pdf-creator/scripts/tests/test_list_rendering.py
+++ b/pdf-creator/scripts/tests/test_list_rendering.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""
+Test list rendering in PDF generation.
+
+Verifies that markdown lists are correctly rendered in PDFs,
+even when they don't have blank lines before them.
+
+The original markdown files are NOT modified - preprocessing
+happens in memory during conversion.
+"""
+
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+# Test markdown content with various list scenarios
+TEST_MARKDOWN = """# 测试列表解析
+
+## 场景1：列表前有空行（正常）
+
+这是一段文字。
+
+- 列表项 1
+- 列表项 2
+- 列表项 3
+
+## 场景2：列表前没有空行（关键测试）
+
+这是一段文字。
+- 列表项 1
+- 列表项 2
+- 列表项 3
+
+## 场景3：有序列表前没有空行
+
+这是一段文字。
+1. 第一项
+2. 第二项
+3. 第三项
+
+## 场景4：有序列表前有空行（正常）
+
+这是一段文字。
+
+1. 第一项
+2. 第二项
+3. 第三项
+"""
+
+
+def run_test():
+    """Run the list rendering test."""
+    # Create temporary files
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as md_file:
+        md_file.write(TEST_MARKDOWN)
+        md_path = md_file.name
+
+    pdf_path = md_path.replace('.md', '.pdf')
+    txt_path = md_path.replace('.md', '.txt')
+
+    try:
+        # Generate PDF
+        script_dir = Path(__file__).parent.parent
+        md_to_pdf = script_dir / 'md_to_pdf.py'
+
+        print(f"生成 PDF: {md_path} -> {pdf_path}")
+        result = subprocess.run(
+            ['uv', 'run', '--with', 'weasyprint', str(md_to_pdf), md_path, pdf_path],
+            capture_output=True, text=True, cwd=script_dir.parent
+        )
+
+        if result.returncode != 0:
+            print(f"❌ PDF 生成失败: {result.stderr}")
+            return False
+
+        print(f"✅ PDF 已生成")
+
+        # Extract text from PDF
+        result = subprocess.run(
+            ['pdftotext', pdf_path, txt_path],
+            capture_output=True, text=True
+        )
+
+        if result.returncode != 0:
+            print(f"❌ 文本提取失败: {result.stderr}")
+            return False
+
+        # Read extracted text
+        with open(txt_path, 'r', encoding='utf-8') as f:
+            pdf_text = f.read()
+
+        # Verify original file was not modified
+        with open(md_path, 'r', encoding='utf-8') as f:
+            original_content = f.read()
+
+        if original_content != TEST_MARKDOWN:
+            print("❌ 原始文件被修改了！")
+            return False
+
+        print("✅ 原始文件未被修改")
+
+        # Verify list rendering
+        print("\n=== 列表渲染验证 ===")
+
+        tests_passed = 0
+        tests_total = 4
+
+        # Test 1: List with blank line before it
+        if '• 列表项 1' in pdf_text:
+            print("✅ 场景1: 列表前有空行 - 正确渲染")
+            tests_passed += 1
+        else:
+            print("❌ 场景1: 列表前有空行 - 渲染失败")
+
+        # Test 2: Critical test - list without blank line before it
+        scene2_start = pdf_text.find('场景2')
+        scene2_section = pdf_text[scene2_start:scene2_start+200] if scene2_start != -1 else ""
+
+        if '• 列表项 1' in scene2_section and '- 列表项 1' not in scene2_section:
+            print("✅ 场景2: 列表前没有空行 - 正确渲染（关键测试）")
+            tests_passed += 1
+        else:
+            print("❌ 场景2: 列表前没有空行 - 渲染失败")
+            print(f"   实际内容: {scene2_section}")
+
+        # Test 3: Ordered list without blank line
+        scene3_start = pdf_text.find('场景3')
+        scene3_section = pdf_text[scene3_start:scene3_start+200] if scene3_start != -1 else ""
+
+        if '1. 第一项' in scene3_section and '2. 第二项' in scene3_section:
+            print("✅ 场景3: 有序列表前没有空行 - 正确渲染")
+            tests_passed += 1
+        else:
+            print("❌ 场景3: 有序列表前没有空行 - 渲染失败")
+
+        # Test 4: Ordered list with blank line
+        if '1. 第一项' in pdf_text and '2. 第二项' in pdf_text:
+            print("✅ 场景4: 有序列表前有空行 - 正确渲染")
+            tests_passed += 1
+        else:
+            print("❌ 场景4: 有序列表前有空行 - 渲染失败")
+
+        print(f"\n=== 测试结果: {tests_passed}/{tests_total} 通过 ===")
+
+        if tests_passed == tests_total:
+            print("\n✅ 所有测试通过！")
+            print(f"\n生成的文件:")
+            print(f"  Markdown: {md_path}")
+            print(f"  PDF:      {pdf_path}")
+            print(f"  Text:     {txt_path}")
+            return True
+        else:
+            print(f"\n❌ {tests_total - tests_passed} 个测试失败")
+            return False
+
+    except Exception as e:
+        print(f"❌ 测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+if __name__ == '__main__':
+    success = run_test()
+    sys.exit(0 if success else 1)