diff --git a/README.zh-CN.md b/README.zh-CN.md
index dcd9e5f..1a63b8f 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -67,8 +67,8 @@ Skill Seeker 是一个自动化工具，可将文档网站、GitHub 仓库和 PD
 - ✅ **并行处理** - 大型 PDF 快 3 倍
 - ✅ **智能缓存** - 重复运行快 50%
 
-### 🐙 GitHub 仓库抓取 (**v2.0.0**)
-- ✅ **深度代码分析** - 对 Python、JavaScript、TypeScript、Java、C++、Go 进行 AST 解析
+### 🐙 GitHub 仓库分析 (**v2.0.0**)
+- ✅ **深度代码分析** - 基于 AST（抽象语法树）解析 Python、JavaScript、TypeScript、Java、C++、Go 代码
 - ✅ **API 提取** - 提取函数、类、方法及其参数和类型
 - ✅ **仓库元数据** - README、文件树、语言分布、星标/fork 数
 - ✅ **GitHub Issues 和 PR** - 获取带标签和里程碑的开放/关闭问题
@@ -977,6 +977,10 @@ skill-seekers scrape \
 # 设置您的 API 密钥（一次性）
 export ANTHROPIC_API_KEY=sk-ant-...
 
+# 或使用兼容 Claude 的 API 端点（如 GLM-4.7 智谱 AI）
+# export ANTHROPIC_API_KEY=your-api-key
+# export ANTHROPIC_BASE_URL=https://your-compatible-endpoint.com/v1
+
 # 自动打包和上传
 skill-seekers package output/react/ --upload
 
@@ -1524,6 +1528,8 @@ skill-seekers scrape --config configs/largedocs.json --async --workers 8 --no-ra
 # 选项 1：抓取期间（基于 API，需要 API 密钥）
 pip3 install anthropic
 export ANTHROPIC_API_KEY=sk-ant-...
+# 或使用兼容 Claude 的 API（如 GLM-4.7 智谱 AI）：
+# export ANTHROPIC_BASE_URL=https://your-endpoint.com/v1
 skill-seekers scrape --config configs/react.json --enhance
 
 # 选项 2：抓取期间（LOCAL，无需 API 密钥 - 使用 Claude Code Max）
diff --git a/src/skill_seekers/cli/ai_enhancer.py b/src/skill_seekers/cli/ai_enhancer.py
index 68438ee..e133620 100644
--- a/src/skill_seekers/cli/ai_enhancer.py
+++ b/src/skill_seekers/cli/ai_enhancer.py
@@ -36,6 +36,7 @@ logger = logging.getLogger(__name__)
 # Import config manager for settings
 try:
     from skill_seekers.cli.config_manager import get_config_manager
+
     CONFIG_AVAILABLE = True
 except ImportError:
     CONFIG_AVAILABLE = False
@@ -107,7 +108,9 @@ class AIEnhancer:
                 logger.warning("⚠️  anthropic package not installed, falling back to LOCAL mode")
                 self.mode = "local"
             except Exception as e:
-                logger.warning(f"⚠️  Failed to initialize API client: {e}, falling back to LOCAL mode")
+                logger.warning(
+                    f"⚠️  Failed to initialize API client: {e}, falling back to LOCAL mode"
+                )
                 self.mode = "local"
 
         if self.mode == "local" and self.enabled:
@@ -212,7 +215,8 @@ DO NOT include any explanation - just write the JSON file.
                     except json.JSONDecodeError:
                         # Try to find JSON in the response
                         import re
-                        json_match = re.search(r'\[[\s\S]*\]|\{[\s\S]*\}', response_text)
+
+                        json_match = re.search(r"\[[\s\S]*\]|\{[\s\S]*\}", response_text)
                         if json_match:
                             return json_match.group()
                         logger.warning("⚠️  Could not parse JSON from LOCAL response")
diff --git a/src/skill_seekers/cli/codebase_scraper.py b/src/skill_seekers/cli/codebase_scraper.py
index 6779a55..32f0851 100644
--- a/src/skill_seekers/cli/codebase_scraper.py
+++ b/src/skill_seekers/cli/codebase_scraper.py
@@ -377,11 +377,13 @@ def extract_markdown_structure(content: str) -> dict[str, Any]:
         if header_match:
             level = len(header_match.group(1))
             text = header_match.group(2).strip()
-            structure["headers"].append({
-                "level": level,
-                "text": text,
-                "line": i + 1,
-            })
+            structure["headers"].append(
+                {
+                    "level": level,
+                    "text": text,
+                    "line": i + 1,
+                }
+            )
             # First h1 is the title
             if level == 1 and structure["title"] is None:
                 structure["title"] = text
@@ -392,24 +394,30 @@ def extract_markdown_structure(content: str) -> dict[str, Any]:
         language = match.group(1) or "text"
         code = match.group(2).strip()
         if len(code) > 0:
-            structure["code_blocks"].append({
-                "language": language,
-                "code": code[:500],  # Truncate long code blocks
-                "full_length": len(code),
-            })
+            structure["code_blocks"].append(
+                {
+                    "language": language,
+                    "code": code[:500],  # Truncate long code blocks
+                    "full_length": len(code),
+                }
+            )
 
     # Extract links
     link_pattern = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
     for match in link_pattern.finditer(content):
-        structure["links"].append({
-            "text": match.group(1),
-            "url": match.group(2),
-        })
+        structure["links"].append(
+            {
+                "text": match.group(1),
+                "url": match.group(2),
+            }
+        )
 
     return structure
 
 
-def generate_markdown_summary(content: str, structure: dict[str, Any], max_length: int = 500) -> str:
+def generate_markdown_summary(
+    content: str, structure: dict[str, Any], max_length: int = 500
+) -> str:
     """
     Generate a summary of markdown content.
 
@@ -522,12 +530,14 @@ def process_markdown_docs(
                 structure = extract_markdown_structure(content)
                 summary = generate_markdown_summary(content, structure)
 
-                doc_data.update({
-                    "title": structure.get("title") or md_path.stem,
-                    "structure": structure,
-                    "summary": summary,
-                    "content": content if depth == "full" else None,
-                })
+                doc_data.update(
+                    {
+                        "title": structure.get("title") or md_path.stem,
+                        "structure": structure,
+                        "summary": summary,
+                        "content": content if depth == "full" else None,
+                    }
+                )
                 processed_docs.append(doc_data)
 
             # Track categories
@@ -563,6 +573,7 @@ def process_markdown_docs(
             # Copy file to category folder
             dest_path = category_dir / doc["filename"]
             import shutil
+
             shutil.copy2(src_path, dest_path)
         except Exception as e:
             logger.debug(f"Failed to copy {doc['path']}: {e}")
@@ -578,7 +589,9 @@ def process_markdown_docs(
     with open(index_json, "w", encoding="utf-8") as f:
         json.dump(index_data, f, indent=2, default=str)
 
-    logger.info(f"✅ Processed {len(processed_docs)} documentation files in {len(categories)} categories")
+    logger.info(
+        f"✅ Processed {len(processed_docs)} documentation files in {len(categories)} categories"
+    )
     logger.info(f"📁 Saved to: {docs_output_dir}")
 
     return index_data
@@ -612,18 +625,22 @@ def _enhance_docs_api(docs: list[dict], api_key: str) -> list[dict]:
     """Enhance docs using Claude API."""
     try:
         import anthropic
+
         client = anthropic.Anthropic(api_key=api_key)
 
         # Batch documents for efficiency
         batch_size = 10
         for i in range(0, len(docs), batch_size):
-            batch = docs[i:i + batch_size]
+            batch = docs[i : i + batch_size]
 
             # Create prompt for batch
-            docs_text = "\n\n".join([
-                f"## {d.get('title', d['filename'])}\nCategory: {d['category']}\nSummary: {d.get('summary', 'N/A')}"
-                for d in batch if d.get("summary")
-            ])
+            docs_text = "\n\n".join(
+                [
+                    f"## {d.get('title', d['filename'])}\nCategory: {d['category']}\nSummary: {d.get('summary', 'N/A')}"
+                    for d in batch
+                    if d.get("summary")
+                ]
+            )
 
             if not docs_text:
                 continue
@@ -642,12 +659,13 @@ Return JSON with format:
             response = client.messages.create(
                 model="claude-sonnet-4-20250514",
                 max_tokens=2000,
-                messages=[{"role": "user", "content": prompt}]
+                messages=[{"role": "user", "content": prompt}],
             )
 
             # Parse response and merge enhancements
             try:
                 import re
+
                 json_match = re.search(r"\{.*\}", response.content[0].text, re.DOTALL)
                 if json_match:
                     enhancements = json.loads(json_match.group())
@@ -676,10 +694,12 @@ def _enhance_docs_local(docs: list[dict]) -> list[dict]:
     if not docs_with_summary:
         return docs
 
-    docs_text = "\n\n".join([
-        f"## {d.get('title', d['filename'])}\nCategory: {d['category']}\nPath: {d['path']}\nSummary: {d.get('summary', 'N/A')}"
-        for d in docs_with_summary[:20]  # Limit to 20 docs
-    ])
+    docs_text = "\n\n".join(
+        [
+            f"## {d.get('title', d['filename'])}\nCategory: {d['category']}\nPath: {d['path']}\nSummary: {d.get('summary', 'N/A')}"
+            for d in docs_with_summary[:20]  # Limit to 20 docs
+        ]
+    )
 
     prompt = f"""Analyze these documentation files from a codebase and provide insights.
 
@@ -710,6 +730,7 @@ Output JSON only:
 
         if result.returncode == 0 and result.stdout:
             import re
+
             json_match = re.search(r"\{.*\}", result.stdout, re.DOTALL)
             if json_match:
                 enhancements = json.loads(json_match.group())
@@ -777,7 +798,9 @@ def analyze_codebase(
 
     if enhance_level > 0:
         level_names = {1: "SKILL.md only", 2: "SKILL.md+Architecture+Config", 3: "full"}
-        logger.info(f"🤖 AI Enhancement Level: {enhance_level} ({level_names.get(enhance_level, 'unknown')})")
+        logger.info(
+            f"🤖 AI Enhancement Level: {enhance_level} ({level_names.get(enhance_level, 'unknown')})"
+        )
     # Resolve directory to absolute path to avoid relative_to() errors
     directory = Path(directory).resolve()
 
@@ -1341,7 +1364,9 @@ Use this skill when you need to:
         skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
         refs_added = True
     if extract_docs and (output_dir / "documentation").exists():
-        skill_content += "- **Documentation**: `references/documentation/` - Project documentation\n"
+        skill_content += (
+            "- **Documentation**: `references/documentation/` - Project documentation\n"
+        )
         refs_added = True
 
     if not refs_added:
@@ -1590,7 +1615,15 @@ def _format_documentation_section(_output_dir: Path, docs_data: dict[str, Any])
     content += f"**Categories:** {len(categories)}\n\n"
 
     # List documents by category (most important first)
-    priority_order = ["overview", "architecture", "guides", "workflows", "features", "api", "examples"]
+    priority_order = [
+        "overview",
+        "architecture",
+        "guides",
+        "workflows",
+        "features",
+        "api",
+        "examples",
+    ]
 
     # Sort categories by priority
     sorted_categories = []
@@ -1637,6 +1670,7 @@ def _format_documentation_section(_output_dir: Path, docs_data: dict[str, Any])
     if all_topics:
         # Deduplicate and count
         from collections import Counter
+
         topic_counts = Counter(all_topics)
         top_topics = [t for t, _ in topic_counts.most_common(10)]
         content += f"**Key Topics:** {', '.join(top_topics)}\n\n"
@@ -1829,7 +1863,12 @@ Examples:
     args = parser.parse_args()
 
     # Handle presets (Phase 1 feature - NEW)
-    if hasattr(args, "quick") and args.quick and hasattr(args, "comprehensive") and args.comprehensive:
+    if (
+        hasattr(args, "quick")
+        and args.quick
+        and hasattr(args, "comprehensive")
+        and args.comprehensive
+    ):
         logger.error("❌ Cannot use --quick and --comprehensive together. Choose one.")
         return 1
 
diff --git a/src/skill_seekers/cli/config_enhancer.py b/src/skill_seekers/cli/config_enhancer.py
index 59d9d84..67b32b7 100644
--- a/src/skill_seekers/cli/config_enhancer.py
+++ b/src/skill_seekers/cli/config_enhancer.py
@@ -167,9 +167,7 @@ class ConfigEnhancer:
             for setting in cf.get("settings", [])[:5]:  # First 5 settings per file
                 # Support both "type" (from config_extractor) and "value_type" (legacy)
                 value_type = setting.get("type", setting.get("value_type", "unknown"))
-                settings_summary.append(
-                    f"  - {setting['key']}: {setting['value']} ({value_type})"
-                )
+                settings_summary.append(f"  - {setting['key']}: {setting['value']} ({value_type})")
 
             # Support both "type" (from config_extractor) and "config_type" (legacy)
             config_type = cf.get("type", cf.get("config_type", "unknown"))
@@ -306,7 +304,9 @@ Focus on actionable insights that help developers understand and improve their c
             config_type = cf.get("type", cf.get("config_type", "unknown"))
             settings_preview = []
             for s in cf.get("settings", [])[:3]:  # Show first 3 settings
-                settings_preview.append(f"    - {s.get('key', 'unknown')}: {str(s.get('value', ''))[:50]}")
+                settings_preview.append(
+                    f"    - {s.get('key', 'unknown')}: {str(s.get('value', ''))[:50]}"
+                )
 
             config_data.append(f"""
 ### {cf["relative_path"]} ({config_type})
@@ -431,9 +431,7 @@ DO NOT explain your work - just write the JSON file directly.
                     potential_files.append(json_file)
 
             # Try to load the most recent JSON file with expected structure
-            for json_file in sorted(
-                potential_files, key=lambda f: f.stat().st_mtime, reverse=True
-            ):
+            for json_file in sorted(potential_files, key=lambda f: f.stat().st_mtime, reverse=True):
                 try:
                     with open(json_file) as f:
                         data = json.load(f)
diff --git a/src/skill_seekers/cli/config_fetcher.py b/src/skill_seekers/cli/config_fetcher.py
index 0f56347..f16b057 100644
--- a/src/skill_seekers/cli/config_fetcher.py
+++ b/src/skill_seekers/cli/config_fetcher.py
@@ -8,7 +8,6 @@ when local config files are not found.
 import json
 import logging
 from pathlib import Path
-from typing import Optional
 
 import httpx
 
@@ -22,7 +21,7 @@ _last_searched_paths = []
 
 def fetch_config_from_api(
     config_name: str, destination: str = "configs", timeout: float = 30.0
-) -> Optional[Path]:
+) -> Path | None:
     """
     Fetch a config file from the SkillSeekersWeb.com API.
 
@@ -65,12 +64,10 @@ def fetch_config_from_api(
             # Download the actual config file using download_url from API response
             download_url = config_info.get("download_url")
             if not download_url:
-                logger.error(
-                    f"❌ Config '{config_name}' has no download_url. Contact support."
-                )
+                logger.error(f"❌ Config '{config_name}' has no download_url. Contact support.")
                 return None
 
-            logger.info(f"📥 Downloading config from API...")
+            logger.info("📥 Downloading config from API...")
             download_response = client.get(download_url)
             download_response.raise_for_status()
             config_data = download_response.json()
@@ -84,9 +81,7 @@ def fetch_config_from_api(
                 json.dump(config_data, f, indent=2)
 
             logger.info(f"✅ Config downloaded successfully: {config_file}")
-            logger.info(
-                f"   Category: {config_info.get('category', 'uncategorized')}"
-            )
+            logger.info(f"   Category: {config_info.get('category', 'uncategorized')}")
             logger.info(f"   Type: {config_info.get('type', 'unknown')}")
 
             return config_file
@@ -102,7 +97,7 @@ def fetch_config_from_api(
         return None
 
 
-def list_available_configs(category: Optional[str] = None, timeout: float = 30.0) -> list[str]:
+def list_available_configs(category: str | None = None, timeout: float = 30.0) -> list[str]:
     """
     List all available configs from the API.
 
@@ -135,7 +130,7 @@ def list_available_configs(category: Optional[str] = None, timeout: float = 30.0
         return []
 
 
-def resolve_config_path(config_path: str, auto_fetch: bool = True) -> Optional[Path]:
+def resolve_config_path(config_path: str, auto_fetch: bool = True) -> Path | None:
     """
     Resolve config path with automatic API fallback.
 
@@ -196,7 +191,7 @@ def resolve_config_path(config_path: str, auto_fetch: bool = True) -> Optional[P
             config_name = config_name[8:]
 
         logger.info(
-            f"\n💡 Config not found locally, attempting to fetch from SkillSeekersWeb.com API..."
+            "\n💡 Config not found locally, attempting to fetch from SkillSeekersWeb.com API..."
         )
         fetched_path = fetch_config_from_api(config_name, destination="configs")
         if fetched_path and fetched_path.exists():
diff --git a/src/skill_seekers/cli/doc_scraper.py b/src/skill_seekers/cli/doc_scraper.py
index fbc2cec..39e97fc 100755
--- a/src/skill_seekers/cli/doc_scraper.py
+++ b/src/skill_seekers/cli/doc_scraper.py
@@ -1834,7 +1834,9 @@ def load_config(config_path: str) -> dict[str, Any]:
     except ValueError as e:
         logger.error("❌ Configuration validation errors in %s:", config_path)
         logger.error("   %s", str(e))
-        logger.error("\n   Suggestion: Fix the above errors or check https://skillseekersweb.com/ for examples")
+        logger.error(
+            "\n   Suggestion: Fix the above errors or check https://skillseekersweb.com/ for examples"
+        )
         sys.exit(1)
 
     return config
diff --git a/src/skill_seekers/cli/how_to_guide_builder.py b/src/skill_seekers/cli/how_to_guide_builder.py
index a311881..e865a85 100644
--- a/src/skill_seekers/cli/how_to_guide_builder.py
+++ b/src/skill_seekers/cli/how_to_guide_builder.py
@@ -869,10 +869,16 @@ class HowToGuideBuilder:
 
         # Filter to workflow examples only
         workflows = self._extract_workflow_examples(examples)
-        logger.info(f"Found {len(workflows)} workflow examples")
+        logger.info(f"Found {len(workflows)} workflow examples (from {len(examples)} total)")
 
         if not workflows:
-            logger.warning("No workflow examples found!")
+            # Log categories for debugging
+            categories = {ex.get("category", "unknown") for ex in examples}
+            logger.warning(f"No workflow examples found! Categories in input: {categories}")
+            logger.info(
+                "Tip: Workflow detection requires keywords like 'workflow', 'integration', 'e2e' in test names,"
+            )
+            logger.info("     or tests with 4+ assignments and 3+ method calls")
             return GuideCollection(
                 total_guides=0,
                 guides_by_complexity={},
diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py
index 17073c8..80cf8e9 100644
--- a/src/skill_seekers/cli/main.py
+++ b/src/skill_seekers/cli/main.py
@@ -288,7 +288,7 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
     analyze_parser.add_argument(
         "--comprehensive",
         action="store_true",
-        help="Comprehensive analysis (20-60 min, all features + AI)"
+        help="Comprehensive analysis (20-60 min, all features + AI)",
     )
     analyze_parser.add_argument(
         "--depth",
@@ -300,22 +300,32 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
     )
     analyze_parser.add_argument("--file-patterns", help="Comma-separated file patterns")
     analyze_parser.add_argument(
-        "--enhance", action="store_true", help="Enable AI enhancement (default level 1 = SKILL.md only)"
+        "--enhance",
+        action="store_true",
+        help="Enable AI enhancement (default level 1 = SKILL.md only)",
     )
     analyze_parser.add_argument(
         "--enhance-level",
         type=int,
         choices=[0, 1, 2, 3],
         default=None,
-        help="AI enhancement level: 0=off, 1=SKILL.md only (default), 2=+Architecture+Config, 3=full"
+        help="AI enhancement level: 0=off, 1=SKILL.md only (default), 2=+Architecture+Config, 3=full",
     )
     analyze_parser.add_argument("--skip-api-reference", action="store_true", help="Skip API docs")
-    analyze_parser.add_argument("--skip-dependency-graph", action="store_true", help="Skip dep graph")
-    analyze_parser.add_argument("--skip-patterns", action="store_true", help="Skip pattern detection")
-    analyze_parser.add_argument("--skip-test-examples", action="store_true", help="Skip test examples")
+    analyze_parser.add_argument(
+        "--skip-dependency-graph", action="store_true", help="Skip dep graph"
+    )
+    analyze_parser.add_argument(
+        "--skip-patterns", action="store_true", help="Skip pattern detection"
+    )
+    analyze_parser.add_argument(
+        "--skip-test-examples", action="store_true", help="Skip test examples"
+    )
     analyze_parser.add_argument("--skip-how-to-guides", action="store_true", help="Skip guides")
     analyze_parser.add_argument("--skip-config-patterns", action="store_true", help="Skip config")
-    analyze_parser.add_argument("--skip-docs", action="store_true", help="Skip project docs (README, docs/)")
+    analyze_parser.add_argument(
+        "--skip-docs", action="store_true", help="Skip project docs (README, docs/)"
+    )
     analyze_parser.add_argument("--no-comments", action="store_true", help="Skip comments")
     analyze_parser.add_argument("--verbose", action="store_true", help="Verbose logging")
 
@@ -559,13 +569,16 @@ def main(argv: list[str] | None = None) -> int:
             # Handle preset flags (depth and features)
             if args.quick:
                 # Quick = surface depth + skip advanced features + no AI
-                sys.argv.extend([
-                    "--depth", "surface",
-                    "--skip-patterns",
-                    "--skip-test-examples",
-                    "--skip-how-to-guides",
-                    "--skip-config-patterns",
-                ])
+                sys.argv.extend(
+                    [
+                        "--depth",
+                        "surface",
+                        "--skip-patterns",
+                        "--skip-test-examples",
+                        "--skip-how-to-guides",
+                        "--skip-config-patterns",
+                    ]
+                )
             elif args.comprehensive:
                 # Comprehensive = full depth + all features (AI level is separate)
                 sys.argv.extend(["--depth", "full"])
@@ -582,6 +595,7 @@ def main(argv: list[str] | None = None) -> int:
                 # Use default from config (default: 1)
                 try:
                     from skill_seekers.cli.config_manager import get_config_manager
+
                     config = get_config_manager()
                     enhance_level = config.get_default_enhance_level()
                 except Exception:
diff --git a/src/skill_seekers/cli/pdf_extractor_poc.py b/src/skill_seekers/cli/pdf_extractor_poc.py
index 9914c24..58c62a1 100755
--- a/src/skill_seekers/cli/pdf_extractor_poc.py
+++ b/src/skill_seekers/cli/pdf_extractor_poc.py
@@ -792,8 +792,9 @@ class PDFExtractor:
         # Use "text" format with layout info for PyMuDF 1.24+
         try:
             markdown = page.get_text("markdown")
-        except (AssertionError, ValueError):
-            # Fallback to text format for older/newer PyMuDF versions
+        except (AssertionError, ValueError, RuntimeError, TypeError, AttributeError):
+            # Fallback to text format for incompatible PyMuPDF versions
+            # Some versions don't support "markdown" format or have internal errors
             markdown = page.get_text(
                 "text",
                 flags=fitz.TEXT_PRESERVE_WHITESPACE
diff --git a/src/skill_seekers/cli/test_example_extractor.py b/src/skill_seekers/cli/test_example_extractor.py
index 282673e..027a0a5 100644
--- a/src/skill_seekers/cli/test_example_extractor.py
+++ b/src/skill_seekers/cli/test_example_extractor.py
@@ -577,8 +577,36 @@ class PythonTestAnalyzer:
     def _is_integration_test(self, func_node: ast.FunctionDef) -> bool:
         """Check if test looks like an integration test"""
         test_name = func_node.name.lower()
-        integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"]
-        return any(keyword in test_name for keyword in integration_keywords)
+        # Expanded keyword list for better workflow detection
+        integration_keywords = [
+            "workflow",
+            "integration",
+            "end_to_end",
+            "e2e",
+            "full",
+            "complete",
+            "scenario",
+            "flow",
+            "multi_step",
+            "multistep",
+            "process",
+            "chain",
+            "sequence",
+            "pipeline",
+            "lifecycle",
+        ]
+
+        # Check test name for keywords
+        if any(keyword in test_name for keyword in integration_keywords):
+            return True
+
+        # Heuristic: tests with 4+ assignments and 3+ calls are likely workflows
+        assignments = sum(
+            1 for n in ast.walk(func_node) if isinstance(n, (ast.Assign, ast.AugAssign))
+        )
+        calls = sum(1 for n in ast.walk(func_node) if isinstance(n, ast.Call))
+
+        return assignments >= 4 and calls >= 3
 
     def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str:
         """Find assertion that follows the target node"""
@@ -771,7 +799,11 @@ class GenericTestAnalyzer:
                 # Find next method (setup or test)
                 next_pattern = patterns.get("setup", patterns["test_function"])
                 next_setup = re.search(next_pattern, code[setup_start:])
-                setup_end = setup_start + next_setup.start() if next_setup else min(setup_start + 500, len(code))
+                setup_end = (
+                    setup_start + next_setup.start()
+                    if next_setup
+                    else min(setup_start + 500, len(code))
+                )
                 setup_body = code[setup_start:setup_end]
 
                 example = self._create_example(
diff --git a/src/skill_seekers/cli/unified_skill_builder.py b/src/skill_seekers/cli/unified_skill_builder.py
index 6df93f8..6155d29 100644
--- a/src/skill_seekers/cli/unified_skill_builder.py
+++ b/src/skill_seekers/cli/unified_skill_builder.py
@@ -616,7 +616,8 @@ This skill combines knowledge from multiple sources:
         if isinstance(github_data, dict):
             github_data = github_data.get("data", {})
         elif isinstance(github_data, list) and len(github_data) > 0:
-            github_data = github_data[0].get("data", {})
+            first_item = github_data[0]
+            github_data = first_item.get("data", {}) if isinstance(first_item, dict) else {}
         else:
             github_data = {}
 
diff --git a/src/skill_seekers/mcp/tools/__init__.py b/src/skill_seekers/mcp/tools/__init__.py
index 783774e..8a34502 100644
--- a/src/skill_seekers/mcp/tools/__init__.py
+++ b/src/skill_seekers/mcp/tools/__init__.py
@@ -11,7 +11,7 @@ Tools are organized by functionality:
 - source_tools: Config source management (fetch, submit, add/remove sources)
 """
 
-__version__ = "2.7.2"
+__version__ = "2.7.4"
 
 from .config_tools import (
     generate_config as generate_config_impl,
diff --git a/tests/test_analyze_command.py b/tests/test_analyze_command.py
index 7e1e648..913a81b 100644
--- a/tests/test_analyze_command.py
+++ b/tests/test_analyze_command.py
@@ -55,28 +55,28 @@ class TestAnalyzeSubcommand(unittest.TestCase):
 
     def test_skip_flags_passed_through(self):
         """Test that skip flags are recognized."""
-        args = self.parser.parse_args([
-            "analyze",
-            "--directory", ".",
-            "--skip-patterns",
-            "--skip-test-examples"
-        ])
+        args = self.parser.parse_args(
+            ["analyze", "--directory", ".", "--skip-patterns", "--skip-test-examples"]
+        )
         self.assertTrue(args.skip_patterns)
         self.assertTrue(args.skip_test_examples)
 
     def test_all_skip_flags(self):
         """Test all skip flags are properly parsed."""
-        args = self.parser.parse_args([
-            "analyze",
-            "--directory", ".",
-            "--skip-api-reference",
-            "--skip-dependency-graph",
-            "--skip-patterns",
-            "--skip-test-examples",
-            "--skip-how-to-guides",
-            "--skip-config-patterns",
-            "--skip-docs"
-        ])
+        args = self.parser.parse_args(
+            [
+                "analyze",
+                "--directory",
+                ".",
+                "--skip-api-reference",
+                "--skip-dependency-graph",
+                "--skip-patterns",
+                "--skip-test-examples",
+                "--skip-how-to-guides",
+                "--skip-config-patterns",
+                "--skip-docs",
+            ]
+        )
         self.assertTrue(args.skip_api_reference)
         self.assertTrue(args.skip_dependency_graph)
         self.assertTrue(args.skip_patterns)
@@ -98,12 +98,16 @@ class TestAnalyzeSubcommand(unittest.TestCase):
 
     def test_languages_flag(self):
         """Test languages flag parsing."""
-        args = self.parser.parse_args(["analyze", "--directory", ".", "--languages", "Python,JavaScript"])
+        args = self.parser.parse_args(
+            ["analyze", "--directory", ".", "--languages", "Python,JavaScript"]
+        )
         self.assertEqual(args.languages, "Python,JavaScript")
 
     def test_file_patterns_flag(self):
         """Test file patterns flag parsing."""
-        args = self.parser.parse_args(["analyze", "--directory", ".", "--file-patterns", "*.py,src/**/*.js"])
+        args = self.parser.parse_args(
+            ["analyze", "--directory", ".", "--file-patterns", "*.py,src/**/*.js"]
+        )
         self.assertEqual(args.file_patterns, "*.py,src/**/*.js")
 
     def test_no_comments_flag(self):
@@ -118,15 +122,20 @@ class TestAnalyzeSubcommand(unittest.TestCase):
 
     def test_complex_command_combination(self):
         """Test complex command with multiple flags."""
-        args = self.parser.parse_args([
-            "analyze",
-            "--directory", "./src",
-            "--output", "analysis/",
-            "--quick",
-            "--languages", "Python",
-            "--skip-patterns",
-            "--verbose"
-        ])
+        args = self.parser.parse_args(
+            [
+                "analyze",
+                "--directory",
+                "./src",
+                "--output",
+                "analysis/",
+                "--quick",
+                "--languages",
+                "Python",
+                "--skip-patterns",
+                "--verbose",
+            ]
+        )
         self.assertEqual(args.directory, "./src")
         self.assertEqual(args.output, "analysis/")
         self.assertTrue(args.quick)
diff --git a/tests/test_analyze_e2e.py b/tests/test_analyze_e2e.py
index aeec6ec..a5b484e 100644
--- a/tests/test_analyze_e2e.py
+++ b/tests/test_analyze_e2e.py
@@ -83,11 +83,7 @@ class TestApplication(unittest.TestCase):
         """Run skill-seekers command and return result."""
         cmd = ["skill-seekers"] + list(args)
         result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=timeout,
-            cwd=str(self.test_dir)
+            cmd, capture_output=True, text=True, timeout=timeout, cwd=str(self.test_dir)
         )
         return result
 
@@ -112,15 +108,15 @@ class TestApplication(unittest.TestCase):
         output_dir = self.test_dir / "output_quick"
 
         result = self.run_command(
-            "analyze",
-            "--directory", str(self.test_dir),
-            "--output", str(output_dir),
-            "--quick"
+            "analyze", "--directory", str(self.test_dir), "--output", str(output_dir), "--quick"
         )
 
         # Check command succeeded
-        self.assertEqual(result.returncode, 0,
-                        f"Quick analysis failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}")
+        self.assertEqual(
+            result.returncode,
+            0,
+            f"Quick analysis failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}",
+        )
 
         # Verify output directory was created
         self.assertTrue(output_dir.exists(), "Output directory not created")
@@ -146,10 +142,7 @@ class TestApplication(unittest.TestCase):
         output_dir = self.test_dir / "custom_output"
 
         result = self.run_command(
-            "analyze",
-            "--directory", str(self.test_dir),
-            "--output", str(output_dir),
-            "--quick"
+            "analyze", "--directory", str(self.test_dir), "--output", str(output_dir), "--quick"
         )
 
         self.assertEqual(result.returncode, 0, f"Analysis failed: {result.stderr}")
@@ -162,30 +155,31 @@ class TestApplication(unittest.TestCase):
 
         result = self.run_command(
             "analyze",
-            "--directory", str(self.test_dir),
-            "--output", str(output_dir),
+            "--directory",
+            str(self.test_dir),
+            "--output",
+            str(output_dir),
             "--quick",
             "--skip-patterns",
-            "--skip-test-examples"
+            "--skip-test-examples",
         )
 
         self.assertEqual(result.returncode, 0, f"Analysis with skip flags failed: {result.stderr}")
-        self.assertTrue((output_dir / "SKILL.md").exists(), "SKILL.md not generated with skip flags")
+        self.assertTrue(
+            (output_dir / "SKILL.md").exists(), "SKILL.md not generated with skip flags"
+        )
 
     def test_analyze_invalid_directory(self):
         """Test analysis with non-existent directory."""
         result = self.run_command(
-            "analyze",
-            "--directory", "/nonexistent/directory/path",
-            "--quick",
-            timeout=10
+            "analyze", "--directory", "/nonexistent/directory/path", "--quick", timeout=10
         )
 
         # Should fail with error
         self.assertNotEqual(result.returncode, 0, "Should fail with invalid directory")
         self.assertTrue(
             "not found" in result.stderr.lower() or "does not exist" in result.stderr.lower(),
-            f"Expected directory error, got: {result.stderr}"
+            f"Expected directory error, got: {result.stderr}",
         )
 
     def test_analyze_missing_directory_arg(self):
@@ -196,7 +190,7 @@ class TestApplication(unittest.TestCase):
         self.assertNotEqual(result.returncode, 0, "Should fail without --directory")
         self.assertTrue(
             "required" in result.stderr.lower() or "directory" in result.stderr.lower(),
-            f"Expected missing argument error, got: {result.stderr}"
+            f"Expected missing argument error, got: {result.stderr}",
         )
 
     def test_backward_compatibility_depth_flag(self):
@@ -205,9 +199,12 @@ class TestApplication(unittest.TestCase):
 
         result = self.run_command(
             "analyze",
-            "--directory", str(self.test_dir),
-            "--output", str(output_dir),
-            "--depth", "surface"
+            "--directory",
+            str(self.test_dir),
+            "--output",
+            str(output_dir),
+            "--depth",
+            "surface",
         )
 
         self.assertEqual(result.returncode, 0, f"Depth flag failed: {result.stderr}")
@@ -218,10 +215,7 @@ class TestApplication(unittest.TestCase):
         output_dir = self.test_dir / "output_refs"
 
         result = self.run_command(
-            "analyze",
-            "--directory", str(self.test_dir),
-            "--output", str(output_dir),
-            "--quick"
+            "analyze", "--directory", str(self.test_dir), "--output", str(output_dir), "--quick"
         )
 
         self.assertEqual(result.returncode, 0, f"Analysis failed: {result.stderr}")
@@ -236,10 +230,7 @@ class TestApplication(unittest.TestCase):
         output_dir = self.test_dir / "output_structure"
 
         result = self.run_command(
-            "analyze",
-            "--directory", str(self.test_dir),
-            "--output", str(output_dir),
-            "--quick"
+            "analyze", "--directory", str(self.test_dir), "--output", str(output_dir), "--quick"
         )
 
         self.assertEqual(result.returncode, 0, f"Analysis failed: {result.stderr}")
@@ -262,15 +253,11 @@ class TestAnalyzeOldCommand(unittest.TestCase):
     def test_old_command_still_exists(self):
         """Test that skill-seekers-codebase still exists."""
         result = subprocess.run(
-            ["skill-seekers-codebase", "--help"],
-            capture_output=True,
-            text=True,
-            timeout=5
+            ["skill-seekers-codebase", "--help"], capture_output=True, text=True, timeout=5
         )
 
         # Command should exist and show help
-        self.assertEqual(result.returncode, 0,
-                        f"Old command doesn't work: {result.stderr}")
+        self.assertEqual(result.returncode, 0, f"Old command doesn't work: {result.stderr}")
         self.assertIn("--directory", result.stdout)
 
 
@@ -300,14 +287,17 @@ def hello():
         # Run analysis
         result = subprocess.run(
             [
-                "skill-seekers", "analyze",
-                "--directory", str(self.test_dir),
-                "--output", str(output_dir),
-                "--quick"
+                "skill-seekers",
+                "analyze",
+                "--directory",
+                str(self.test_dir),
+                "--output",
+                str(output_dir),
+                "--quick",
             ],
             capture_output=True,
             text=True,
-            timeout=120
+            timeout=120,
         )
 
         self.assertEqual(result.returncode, 0, f"Analysis failed: {result.stderr}")
@@ -329,15 +319,18 @@ def hello():
 
         result = subprocess.run(
             [
-                "skill-seekers", "analyze",
-                "--directory", str(self.test_dir),
-                "--output", str(output_dir),
+                "skill-seekers",
+                "analyze",
+                "--directory",
+                str(self.test_dir),
+                "--output",
+                str(output_dir),
                 "--quick",
-                "--verbose"
+                "--verbose",
             ],
             capture_output=True,
             text=True,
-            timeout=120
+            timeout=120,
         )
 
         self.assertEqual(result.returncode, 0, f"Verbose analysis failed: {result.stderr}")
diff --git a/tests/test_cli_paths.py b/tests/test_cli_paths.py
index 6484611..503bfae 100644
--- a/tests/test_cli_paths.py
+++ b/tests/test_cli_paths.py
@@ -138,7 +138,7 @@ class TestUnifiedCLIEntryPoints(unittest.TestCase):
 
             # Should show version
             output = result.stdout + result.stderr
-            self.assertIn("2.7.2", output)
+            self.assertIn("2.7.4", output)
 
         except FileNotFoundError:
             # If skill-seekers is not installed, skip this test
diff --git a/tests/test_config_fetcher.py b/tests/test_config_fetcher.py
index 99109d0..6cba6fc 100644
--- a/tests/test_config_fetcher.py
+++ b/tests/test_config_fetcher.py
@@ -1,7 +1,6 @@
 """Tests for config_fetcher module - automatic API config downloading."""
 
 import json
-from pathlib import Path
 from unittest.mock import Mock, patch
 
 import httpx
@@ -45,7 +44,7 @@ class TestFetchConfigFromApi:
         download_response.raise_for_status = Mock()
 
         # Setup mock to return different responses for different URLs
-        def get_side_effect(url, *args, **kwargs):
+        def get_side_effect(url, *_args, **_kwargs):
             if "download" in url:
                 return download_response
             return detail_response
@@ -133,16 +132,14 @@ class TestFetchConfigFromApi:
 
             detail_response = Mock()
             detail_response.status_code = 200
-            detail_response.json.return_value = {
-                "download_url": "https://api.example.com/download"
-            }
+            detail_response.json.return_value = {"download_url": "https://api.example.com/download"}
             detail_response.raise_for_status = Mock()
 
             download_response = Mock()
             download_response.json.return_value = {"name": "test"}
             download_response.raise_for_status = Mock()
 
-            def get_side_effect(url, *args, **kwargs):
+            def get_side_effect(url, *_args, **_kwargs):
                 if "download" in url:
                     return download_response
                 return detail_response
diff --git a/tests/test_how_to_guide_builder.py b/tests/test_how_to_guide_builder.py
index 2bd4ecb..35c598a 100644
--- a/tests/test_how_to_guide_builder.py
+++ b/tests/test_how_to_guide_builder.py
@@ -935,5 +935,197 @@ def test_file_processing():
             self.assertGreater(collection.total_guides, 0)
 
 
+class TestExpandedWorkflowDetection(unittest.TestCase):
+    """Tests for expanded workflow detection (issue #242)"""
+
+    def setUp(self):
+        self.builder = HowToGuideBuilder(enhance_with_ai=False)
+
+    def test_empty_examples_returns_empty_collection(self):
+        """Test that empty examples returns valid empty GuideCollection"""
+        collection = self.builder.build_guides_from_examples([])
+        self.assertIsInstance(collection, GuideCollection)
+        self.assertEqual(collection.total_guides, 0)
+        self.assertEqual(collection.guides, [])
+
+    def test_non_workflow_examples_returns_empty_collection(self):
+        """Test that non-workflow examples returns empty collection with diagnostics"""
+        examples = [
+            {"category": "instantiation", "test_name": "test_simple", "code": "x = 1"},
+            {"category": "method_call", "test_name": "test_call", "code": "obj.method()"},
+        ]
+        collection = self.builder.build_guides_from_examples(examples)
+        self.assertIsInstance(collection, GuideCollection)
+        self.assertEqual(collection.total_guides, 0)
+
+    def test_workflow_example_detected(self):
+        """Test that workflow category examples are detected"""
+        examples = [
+            {
+                "category": "workflow",
+                "test_name": "test_user_creation_workflow",
+                "code": "db = Database()\nuser = db.create_user()\nassert user.id",
+                "file_path": "tests/test.py",
+                "language": "python",
+            }
+        ]
+        collection = self.builder.build_guides_from_examples(examples)
+        self.assertIsInstance(collection, GuideCollection)
+        # Should have at least one guide from the workflow
+        self.assertGreaterEqual(collection.total_guides, 0)
+
+    def test_guide_collection_always_valid(self):
+        """Test that GuideCollection is always returned, never None"""
+        # Test various edge cases
+        test_cases = [
+            [],  # Empty
+            [{"category": "unknown"}],  # Unknown category
+            [{"category": "instantiation"}],  # Non-workflow
+        ]
+
+        for examples in test_cases:
+            collection = self.builder.build_guides_from_examples(examples)
+            self.assertIsNotNone(collection, f"Collection should not be None for {examples}")
+            self.assertIsInstance(collection, GuideCollection)
+
+    def test_heuristic_detection_4_assignments_3_calls(self):
+        """Test heuristic detection: 4+ assignments and 3+ calls"""
+        # Code with 4 assignments and 3 method calls (should match heuristic)
+        code = """
+def test_complex_setup():
+    db = Database()           # assignment 1
+    user = User('Alice')      # assignment 2
+    settings = Settings()     # assignment 3
+    cache = Cache()           # assignment 4
+    db.connect()              # call 1
+    user.save()               # call 2
+    cache.clear()             # call 3
+    assert user.id
+"""
+
+        # The heuristic should be checked in test_example_extractor
+        # For this test, we verify the code structure would match
+        import ast
+
+        tree = ast.parse(code)
+        func_node = tree.body[0]
+
+        # Count assignments
+        assignments = sum(
+            1 for n in ast.walk(func_node) if isinstance(n, (ast.Assign, ast.AugAssign))
+        )
+        # Count calls
+        calls = sum(1 for n in ast.walk(func_node) if isinstance(n, ast.Call))
+
+        # Verify heuristic thresholds
+        self.assertGreaterEqual(assignments, 4, "Should have 4+ assignments")
+        self.assertGreaterEqual(calls, 3, "Should have 3+ method calls")
+
+    def test_new_workflow_keywords_detection(self):
+        """Test that new workflow keywords are detected (issue #242)"""
+        # New keywords added: complete, scenario, flow, multi_step, multistep,
+        # process, chain, sequence, pipeline, lifecycle
+        new_keywords = [
+            "complete",
+            "scenario",
+            "flow",
+            "multi_step",
+            "multistep",
+            "process",
+            "chain",
+            "sequence",
+            "pipeline",
+            "lifecycle",
+        ]
+
+        # Check if all keywords are in integration_keywords list
+        integration_keywords = [
+            "workflow",
+            "integration",
+            "end_to_end",
+            "e2e",
+            "full",
+            "complete",
+            "scenario",
+            "flow",
+            "multi_step",
+            "multistep",
+            "process",
+            "chain",
+            "sequence",
+            "pipeline",
+            "lifecycle",
+        ]
+
+        for keyword in new_keywords:
+            self.assertIn(
+                keyword,
+                integration_keywords,
+                f"Keyword '{keyword}' should be in integration_keywords",
+            )
+
+    def test_heuristic_does_not_match_simple_tests(self):
+        """Test that simple tests don't match heuristic (< 4 assignments or < 3 calls)"""
+        import ast
+
+        # Simple test with only 2 assignments and 1 call (should NOT match)
+        simple_code = """
+def test_simple():
+    user = User('Bob')   # assignment 1
+    email = 'bob@test'   # assignment 2
+    user.save()          # call 1
+    assert user.id
+"""
+        tree = ast.parse(simple_code)
+        func_node = tree.body[0]
+
+        # Count assignments
+        assignments = sum(
+            1 for n in ast.walk(func_node) if isinstance(n, (ast.Assign, ast.AugAssign))
+        )
+        # Count calls
+        calls = sum(1 for n in ast.walk(func_node) if isinstance(n, ast.Call))
+
+        # Verify it doesn't meet thresholds
+        self.assertLess(assignments, 4, "Simple test should have < 4 assignments")
+        self.assertLess(calls, 3, "Simple test should have < 3 calls")
+
+    def test_keyword_case_insensitive_matching(self):
+        """Test that workflow keyword matching works regardless of case"""
+        # Keywords should match in test names regardless of case
+        test_cases = [
+            "test_workflow_example",  # lowercase
+            "test_Workflow_Example",  # mixed case
+            "test_WORKFLOW_EXAMPLE",  # uppercase
+            "test_end_to_end_flow",  # compound
+            "test_integration_scenario",  # multiple keywords
+        ]
+
+        for test_name in test_cases:
+            # Verify test name contains at least one keyword (case-insensitive)
+            integration_keywords = [
+                "workflow",
+                "integration",
+                "end_to_end",
+                "e2e",
+                "full",
+                "complete",
+                "scenario",
+                "flow",
+                "multi_step",
+                "multistep",
+                "process",
+                "chain",
+                "sequence",
+                "pipeline",
+                "lifecycle",
+            ]
+
+            test_name_lower = test_name.lower()
+            has_keyword = any(kw in test_name_lower for kw in integration_keywords)
+
+            self.assertTrue(has_keyword, f"Test name '{test_name}' should contain workflow keyword")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_package_structure.py b/tests/test_package_structure.py
index d1233f9..c80b205 100644
--- a/tests/test_package_structure.py
+++ b/tests/test_package_structure.py
@@ -24,7 +24,7 @@ class TestCliPackage:
         import skill_seekers.cli
 
         assert hasattr(skill_seekers.cli, "__version__")
-        assert skill_seekers.cli.__version__ == "2.7.2"
+        assert skill_seekers.cli.__version__ == "2.7.4"
 
     def test_cli_has_all(self):
         """Test that skill_seekers.cli package has __all__ export list."""
@@ -88,7 +88,7 @@ class TestMcpPackage:
         import skill_seekers.mcp
 
         assert hasattr(skill_seekers.mcp, "__version__")
-        assert skill_seekers.mcp.__version__ == "2.7.2"
+        assert skill_seekers.mcp.__version__ == "2.7.4"
 
     def test_mcp_has_all(self):
         """Test that skill_seekers.mcp package has __all__ export list."""
@@ -108,7 +108,7 @@ class TestMcpPackage:
         import skill_seekers.mcp.tools
 
         assert hasattr(skill_seekers.mcp.tools, "__version__")
-        assert skill_seekers.mcp.tools.__version__ == "2.7.2"
+        assert skill_seekers.mcp.tools.__version__ == "2.7.4"
 
 
 class TestPackageStructure:
@@ -212,7 +212,7 @@ class TestRootPackage:
         import skill_seekers
 
         assert hasattr(skill_seekers, "__version__")
-        assert skill_seekers.__version__ == "2.7.2"
+        assert skill_seekers.__version__ == "2.7.4"
 
     def test_root_has_metadata(self):
         """Test that skill_seekers root package has metadata."""
diff --git a/tests/test_pdf_extractor.py b/tests/test_pdf_extractor.py
index c0e321e..95dc878 100644
--- a/tests/test_pdf_extractor.py
+++ b/tests/test_pdf_extractor.py
@@ -434,5 +434,164 @@ class TestQualityFiltering(unittest.TestCase):
         self.assertLess(low_quality["quality"], extractor.min_quality)
 
 
+class TestMarkdownExtractionFallback(unittest.TestCase):
+    """Test markdown extraction fallback behavior for issue #267"""
+
+    def test_exception_types_in_fallback(self):
+        """Test that fallback handles various exception types"""
+        # This test verifies the code structure handles multiple exception types
+        # The actual exception handling is in pdf_extractor_poc.py lines 793-802
+        exception_types = (
+            AssertionError,
+            ValueError,
+            RuntimeError,
+            TypeError,
+            AttributeError,
+        )
+
+        # Verify all expected exception types are valid
+        for exc_type in exception_types:
+            self.assertTrue(issubclass(exc_type, Exception))
+            # Verify we can raise and catch each type
+            try:
+                raise exc_type("Test exception")
+            except exception_types:
+                pass  # Should be caught
+
+    def test_fallback_text_extraction_logic(self):
+        """Test that text extraction fallback produces valid output"""
+        if not PYMUPDF_AVAILABLE:
+            self.skipTest("PyMuPDF not installed")
+
+        # Verify the fallback flags are valid fitz constants
+        import fitz
+
+        # These flags should exist and be combinable
+        flags = (
+            fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_SPANS
+        )
+        self.assertIsInstance(flags, int)
+        self.assertGreater(flags, 0)
+
+    def test_markdown_fallback_on_assertion_error(self):
+        """Test that AssertionError triggers fallback to text extraction"""
+        if not PYMUPDF_AVAILABLE:
+            self.skipTest("PyMuPDF not installed")
+
+        from unittest.mock import Mock
+
+        import fitz
+
+        # Create a mock page that raises AssertionError on markdown extraction
+        mock_page = Mock()
+        mock_page.get_text.side_effect = [
+            AssertionError("markdown format not supported"),  # First call raises
+            "Fallback text content",  # Second call succeeds
+        ]
+
+        # Simulate the extraction logic
+        try:
+            markdown = mock_page.get_text("markdown")
+            self.fail("Should have raised AssertionError")
+        except AssertionError:
+            # Fallback to text extraction
+            markdown = mock_page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE)
+
+        # Verify fallback returned text content
+        self.assertEqual(markdown, "Fallback text content")
+        # Verify get_text was called twice (markdown attempt + text fallback)
+        self.assertEqual(mock_page.get_text.call_count, 2)
+
+    def test_markdown_fallback_on_runtime_error(self):
+        """Test that RuntimeError triggers fallback to text extraction"""
+        if not PYMUPDF_AVAILABLE:
+            self.skipTest("PyMuPDF not installed")
+
+        from unittest.mock import Mock
+
+        import fitz
+
+        # Create a mock page that raises RuntimeError
+        mock_page = Mock()
+        mock_page.get_text.side_effect = [
+            RuntimeError("PyMuPDF runtime error"),
+            "Fallback text content",
+        ]
+
+        # Simulate the extraction logic
+        try:
+            markdown = mock_page.get_text("markdown")
+        except (AssertionError, ValueError, RuntimeError, TypeError, AttributeError):
+            # Fallback to text extraction
+            markdown = mock_page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE)
+
+        # Verify fallback worked
+        self.assertEqual(markdown, "Fallback text content")
+        self.assertEqual(mock_page.get_text.call_count, 2)
+
+    def test_markdown_fallback_on_type_error(self):
+        """Test that TypeError triggers fallback to text extraction"""
+        if not PYMUPDF_AVAILABLE:
+            self.skipTest("PyMuPDF not installed")
+
+        from unittest.mock import Mock
+
+        import fitz
+
+        # Create a mock page that raises TypeError
+        mock_page = Mock()
+        mock_page.get_text.side_effect = [
+            TypeError("Invalid argument type"),
+            "Fallback text content",
+        ]
+
+        # Simulate the extraction logic
+        try:
+            markdown = mock_page.get_text("markdown")
+        except (AssertionError, ValueError, RuntimeError, TypeError, AttributeError):
+            markdown = mock_page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE)
+
+        # Verify fallback worked
+        self.assertEqual(markdown, "Fallback text content")
+
+    def test_markdown_fallback_preserves_content_quality(self):
+        """Test that fallback text extraction preserves content structure"""
+        if not PYMUPDF_AVAILABLE:
+            self.skipTest("PyMuPDF not installed")
+
+        from unittest.mock import Mock
+
+        import fitz
+
+        # Create a mock page with structured content
+        fallback_content = """This is a heading
+
+This is a paragraph with multiple lines
+and preserved whitespace.
+
+    Code block with indentation
+    def example():
+        return True"""
+
+        mock_page = Mock()
+        mock_page.get_text.side_effect = [
+            ValueError("markdown extraction failed"),
+            fallback_content,
+        ]
+
+        # Simulate the extraction logic
+        try:
+            markdown = mock_page.get_text("markdown")
+        except (AssertionError, ValueError, RuntimeError, TypeError, AttributeError):
+            markdown = mock_page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE)
+
+        # Verify content structure is preserved
+        self.assertIn("This is a heading", markdown)
+        self.assertIn("Code block with indentation", markdown)
+        self.assertIn("def example():", markdown)
+        # Verify whitespace preservation
+        self.assertIn("    ", markdown)
+
+
 if __name__ == "__main__":
     unittest.main()