From 0878ad3ef68808cb7def165adadc4dd8780589c6 Mon Sep 17 00:00:00 2001
From: yusyus <yusufkaraaslan.yk@pm.me>
Date: Wed, 18 Feb 2026 22:44:41 +0300
Subject: [PATCH] fix: resolve all ruff linting errors (W293, F401, B904,
 UP007, UP045, E741, SIM102, SIM117, ARG)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Auto-fixed (whitespace, imports, type annotations):
- codebase_scraper.py: W293 blank lines with whitespace
- doc_scraper.py: W293 blank lines with whitespace
- parsers/extractors/__init__.py: W293
- parsers/extractors/base_parser.py: W293, UP007, UP045, F401

Manual fixes:
- enhancement_workflow.py: B904 raise without `from exc`, remove unused `os` import
- parsers/extractors/quality_scorer.py: E741 ambiguous var `l` → `line`
- parsers/extractors/rst_parser.py: SIM102 nested if → combined conditions (x2)
- pdf_scraper.py: F821 undefined `logger` → `print()` (consistent with file style)
- mcp/tools/workflow_tools.py: ARG001 unused `args` → `_args`
- tests/test_workflow_runner.py: ARG005 unused lambda args → `_a`/`_kw`, ARG001 `kwargs` → `_kwargs`
- tests/test_workflows_command.py: SIM117 nested with → combined with (x2)

All 1922 tests pass.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/skill_seekers/cli/codebase_scraper.py     |  10 +-
 src/skill_seekers/cli/doc_scraper.py          |   8 +-
 src/skill_seekers/cli/enhancement_workflow.py |   5 +-
 .../cli/parsers/extractors/__init__.py        |  10 +-
 .../cli/parsers/extractors/base_parser.py     | 132 +++---
 .../cli/parsers/extractors/formatters.py      | 114 +++---
 .../cli/parsers/extractors/markdown_parser.py | 282 +++++++------
 .../cli/parsers/extractors/pdf_parser.py      |  27 +-
 .../cli/parsers/extractors/quality_scorer.py  | 112 +++---
 .../cli/parsers/extractors/rst_parser.py      | 379 +++++++++---------
 .../parsers/extractors/unified_structure.py   | 128 +++---
 src/skill_seekers/cli/pdf_scraper.py          |  14 +-
 src/skill_seekers/cli/unified_enhancer.py     |   7 +-
 src/skill_seekers/cli/unified_scraper.py      |   2 +-
 src/skill_seekers/mcp/tools/workflow_tools.py |   2 +-
 tests/test_create_integration_basic.py        |  44 +-
 tests/test_unified_parsers.py                 |   4 +-
 tests/test_workflow_runner.py                 |  17 +-
 tests/test_workflow_tools_mcp.py              |   3 +-
 tests/test_workflows_command.py               |  52 +--
 20 files changed, 657 insertions(+), 695 deletions(-)
diff --git a/src/skill_seekers/cli/codebase_scraper.py b/src/skill_seekers/cli/codebase_scraper.py
index 0643cfe..36f2e7c 100644
--- a/src/skill_seekers/cli/codebase_scraper.py
+++ b/src/skill_seekers/cli/codebase_scraper.py
@@ -444,7 +444,7 @@ def extract_markdown_structure(content: str) -> dict[str, Any]:
 def extract_rst_structure(content: str) -> dict[str, Any]:
     """
     Extract structure from ReStructuredText (RST) content.
-    
+
     Uses the enhanced unified RST parser for comprehensive extraction.
 
     RST uses underline-style headers:
@@ -474,13 +474,13 @@ def extract_rst_structure(content: str) -> dict[str, Any]:
     # Use the enhanced unified RST parser
     try:
         from skill_seekers.cli.parsers.extractors import RstParser
-        
+
         parser = RstParser()
         result = parser.parse_string(content, "<string>")
-        
+
         if result.success and result.document:
             doc = result.document
-            
+
             # Convert to legacy structure format for backward compatibility
             structure = {
                 "title": doc.title,
@@ -531,7 +531,7 @@ def extract_rst_structure(content: str) -> dict[str, Any]:
     except Exception as e:
         # Fall back to basic extraction if unified parser fails
         logger.warning(f"Enhanced RST parser failed: {e}, using basic parser")
-    
+
     # Legacy basic extraction (fallback)
     import re
 
diff --git a/src/skill_seekers/cli/doc_scraper.py b/src/skill_seekers/cli/doc_scraper.py
index b50adea..83caace 100755
--- a/src/skill_seekers/cli/doc_scraper.py
+++ b/src/skill_seekers/cli/doc_scraper.py
@@ -401,13 +401,13 @@ class DocToSkillConverter:
         # Try enhanced unified parser first
         try:
             from skill_seekers.cli.parsers.extractors import MarkdownParser
-            
+
             parser = MarkdownParser()
             result = parser.parse_string(content, url)
-            
+
             if result.success and result.document:
                 doc = result.document
-                
+
                 # Extract links from the document
                 links = []
                 for link in doc.external_links:
@@ -421,7 +421,7 @@ class DocToSkillConverter:
                     full_url = full_url.split("#")[0]
                     if ".md" in full_url and self.is_valid_url(full_url) and full_url not in links:
                         links.append(full_url)
-                
+
                 return {
                     "url": url,
                     "title": doc.title or "",
diff --git a/src/skill_seekers/cli/enhancement_workflow.py b/src/skill_seekers/cli/enhancement_workflow.py
index b535217..85886e3 100644
--- a/src/skill_seekers/cli/enhancement_workflow.py
+++ b/src/skill_seekers/cli/enhancement_workflow.py
@@ -24,7 +24,6 @@ Usage:
 
 import json
 import logging
-import os
 from dataclasses import dataclass, field
 from datetime import datetime
 from importlib.resources import files as importlib_files
@@ -145,11 +144,11 @@ class WorkflowEngine:
                 pkg_ref = importlib_files("skill_seekers.workflows").joinpath(bare_name)
                 yaml_text = pkg_ref.read_text(encoding="utf-8")
                 logger.info(f"📋 Loading bundled workflow: {bare_name}")
-            except (FileNotFoundError, TypeError, ModuleNotFoundError):
+            except (FileNotFoundError, TypeError, ModuleNotFoundError) as exc:
                 raise FileNotFoundError(
                     f"Workflow '{yaml_ref.stem}' not found. "
                     "Use 'skill-seekers workflows list' to see available workflows."
-                )
+                ) from exc
 
         if resolved_path is not None:
             logger.info(f"📋 Loading workflow: {resolved_path}")
diff --git a/src/skill_seekers/cli/parsers/extractors/__init__.py b/src/skill_seekers/cli/parsers/extractors/__init__.py
index 575d5a3..12a01df 100644
--- a/src/skill_seekers/cli/parsers/extractors/__init__.py
+++ b/src/skill_seekers/cli/parsers/extractors/__init__.py
@@ -6,20 +6,20 @@ a standardized Document structure.
 
 Usage:
     from skill_seekers.cli.parsers.extractors import RstParser, MarkdownParser
-    
+
     # Parse RST file
     parser = RstParser()
     result = parser.parse_file("docs/class_node.rst")
-    
+
     if result.success:
         doc = result.document
         print(f"Title: {doc.title}")
         print(f"Tables: {len(doc.tables)}")
         print(f"Code blocks: {len(doc.code_blocks)}")
-        
+
         # Convert to markdown
         markdown = doc.to_markdown()
-        
+
         # Convert to skill format
         skill_data = doc.to_skill_format()
 
@@ -29,7 +29,7 @@ Available Parsers:
 
 Auto-Detection:
     from skill_seekers.cli.parsers.extractors import parse_document
-    
+
     # Automatically detects format
     result = parse_document("file.rst")
 """
diff --git a/src/skill_seekers/cli/parsers/extractors/base_parser.py b/src/skill_seekers/cli/parsers/extractors/base_parser.py
index 7eb1237..362aa8d 100644
--- a/src/skill_seekers/cli/parsers/extractors/base_parser.py
+++ b/src/skill_seekers/cli/parsers/extractors/base_parser.py
@@ -8,11 +8,11 @@ and implement the same interface for consistent usage.
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Optional, Union
+from typing import Any
 import time
 import logging
 
-from .unified_structure import Document, ExtractionStats
+from .unified_structure import Document
 
 logger = logging.getLogger(__name__)
 
@@ -20,11 +20,11 @@ logger = logging.getLogger(__name__)
 @dataclass
 class ParseResult:
     """Result of parsing a document."""
-    document: Optional[Document] = None
+    document: Document | None = None
     success: bool = False
     errors: list[str] = field(default_factory=list)
     warnings: list[str] = field(default_factory=list)
-    
+
     @property
     def is_ok(self) -> bool:
         """Check if parsing succeeded."""
@@ -34,18 +34,18 @@ class ParseResult:
 class BaseParser(ABC):
     """
     Abstract base class for all document parsers.
-    
+
     Implementations:
     - RstParser: ReStructuredText documents
     - MarkdownParser: Markdown documents
     - PdfParser: PDF documents
     - HtmlParser: HTML documents (future)
     """
-    
-    def __init__(self, options: Optional[dict[str, Any]] = None):
+
+    def __init__(self, options: dict[str, Any] | None = None):
         """
         Initialize parser with options.
-        
+
         Args:
             options: Parser-specific options
                 Common options:
@@ -61,26 +61,26 @@ class BaseParser(ABC):
         self._quality_scoring = self.options.get('quality_scoring', True)
         self._max_file_size = self.options.get('max_file_size_mb', 50.0) * 1024 * 1024
         self._encoding = self.options.get('encoding', 'utf-8')
-    
+
     @property
     @abstractmethod
     def format_name(self) -> str:
         """Return the format name this parser handles."""
         pass
-    
+
     @property
     @abstractmethod
     def supported_extensions(self) -> list[str]:
         """Return list of supported file extensions."""
         pass
-    
-    def can_parse(self, source: Union[str, Path]) -> bool:
+
+    def can_parse(self, source: str | Path) -> bool:
         """
         Check if this parser can handle the given source.
-        
+
         Args:
             source: File path or content string
-            
+
         Returns:
             True if this parser can handle the source
         """
@@ -95,58 +95,58 @@ class BaseParser(ABC):
             except Exception:
                 return False
         return False
-    
-    def parse(self, source: Union[str, Path]) -> ParseResult:
+
+    def parse(self, source: str | Path) -> ParseResult:
         """
         Parse a document from file path or content string.
-        
+
         Args:
             source: File path (str/Path) or content string
-            
+
         Returns:
             ParseResult with document or error info
         """
         start_time = time.time()
         result = ParseResult()
-        
+
         try:
             # Read source
             content, source_path = self._read_source_with_path(source)
-            
+
             # Check file size
             if len(content.encode(self._encoding)) > self._max_file_size:
                 result.errors.append(f"File too large: {source_path}")
                 return result
-            
+
             # Validate format
             if not self._detect_format(content):
                 result.warnings.append(f"Content may not be valid {self.format_name}")
-            
+
             # Parse content
             document = self._parse_content(content, source_path)
-            
+
             # Post-process
             document = self._post_process(document)
-            
+
             # Record stats
             processing_time = (time.time() - start_time) * 1000
             if document.stats:
                 document.stats.processing_time_ms = processing_time
-            
+
             result.document = document
             result.success = True
             result.warnings.extend(document.stats.warnings)
-            
+
         except Exception as e:
             result.errors.append(f"Parse error: {str(e)}")
             logger.exception(f"Error parsing {source}")
-        
+
         return result
-    
-    def parse_file(self, path: Union[str, Path]) -> ParseResult:
+
+    def parse_file(self, path: str | Path) -> ParseResult:
         """Parse a file from path."""
         return self.parse(path)
-    
+
     def parse_string(self, content: str, source_path: str = "<string>") -> ParseResult:
         """Parse content from string."""
         # Create a wrapper that looks like a path
@@ -160,46 +160,46 @@ class BaseParser(ABC):
                 return True
             def __str__(self):
                 return self._path
-        
+
         source = StringSource(content, source_path)
         result = self.parse(source)
         if result.document:
             result.document.source_path = source_path
         return result
-    
+
     @abstractmethod
     def _parse_content(self, content: str, source_path: str) -> Document:
         """
         Parse content string into Document.
-        
+
         Args:
             content: Raw content to parse
             source_path: Original source path (for reference)
-            
+
         Returns:
             Parsed Document
         """
         pass
-    
+
     @abstractmethod
     def _detect_format(self, content: str) -> bool:
         """
         Detect if content matches this parser's format.
-        
+
         Args:
             content: Content to check
-            
+
         Returns:
             True if content appears to be this format
         """
         pass
-    
-    def _read_source(self, source: Union[str, Path]) -> str:
+
+    def _read_source(self, source: str | Path) -> str:
         """Read content from source."""
         content, _ = self._read_source_with_path(source)
         return content
-    
-    def _read_source_with_path(self, source: Union[str, Path]) -> tuple[str, str]:
+
+    def _read_source_with_path(self, source: str | Path) -> tuple[str, str]:
         """Read content and return with path."""
         if isinstance(source, str):
             # Check if it's a path or content
@@ -214,37 +214,37 @@ class BaseParser(ABC):
         else:
             # Assume it's a file-like object
             return source.read_text(encoding=self._encoding), str(source)
-    
+
     def _post_process(self, document: Document) -> Document:
         """
         Post-process document after parsing.
-        
+
         Override to add cross-references, validate, etc.
         """
         # Build heading list from blocks
         if not document.headings:
             document.headings = self._extract_headings(document)
-        
+
         # Extract code blocks from blocks
         if not document.code_blocks:
             document.code_blocks = self._extract_code_blocks(document)
-        
+
         # Extract tables from blocks
         if not document.tables:
             document.tables = self._extract_tables(document)
-        
+
         # Update stats
         document.stats.total_blocks = len(document.blocks)
         document.stats.code_blocks = len(document.code_blocks)
         document.stats.tables = len(document.tables)
         document.stats.headings = len(document.headings)
         document.stats.cross_references = len(document.internal_links) + len(document.external_links)
-        
+
         return document
-    
+
     def _extract_headings(self, document: Document) -> list:
         """Extract headings from content blocks."""
-        from .unified_structure import ContentBlockType, Heading
+        from .unified_structure import ContentBlockType
         headings = []
         for block in document.blocks:
             if block.type == ContentBlockType.HEADING:
@@ -252,7 +252,7 @@ class BaseParser(ABC):
                 if heading_data:
                     headings.append(heading_data)
         return headings
-    
+
     def _extract_code_blocks(self, document: Document) -> list:
         """Extract code blocks from content blocks."""
         code_blocks = []
@@ -260,7 +260,7 @@ class BaseParser(ABC):
             if block.metadata.get('code_data'):
                 code_blocks.append(block.metadata['code_data'])
         return code_blocks
-    
+
     def _extract_tables(self, document: Document) -> list:
         """Extract tables from content blocks."""
         tables = []
@@ -268,7 +268,7 @@ class BaseParser(ABC):
             if block.metadata.get('table_data'):
                 tables.append(block.metadata['table_data'])
         return tables
-    
+
     def _create_quality_scorer(self):
         """Create a quality scorer if enabled."""
         if self._quality_scoring:
@@ -277,44 +277,44 @@ class BaseParser(ABC):
         return None
 
 
-def get_parser_for_file(path: Union[str, Path]) -> Optional[BaseParser]:
+def get_parser_for_file(path: str | Path) -> BaseParser | None:
     """
     Get the appropriate parser for a file.
-    
+
     Args:
         path: File path
-        
+
     Returns:
         Appropriate parser instance or None
     """
     path = Path(path)
     suffix = path.suffix.lower()
-    
+
     # Try RST parser
     from .rst_parser import RstParser
     rst_parser = RstParser()
     if suffix in rst_parser.supported_extensions:
         return rst_parser
-    
+
     # Try Markdown parser
     from .markdown_parser import MarkdownParser
     md_parser = MarkdownParser()
     if suffix in md_parser.supported_extensions:
         return md_parser
-    
+
     # Could add PDF, HTML parsers here
-    
+
     return None
 
 
-def parse_document(source: Union[str, Path], format_hint: Optional[str] = None) -> ParseResult:
+def parse_document(source: str | Path, format_hint: str | None = None) -> ParseResult:
     """
     Parse a document, auto-detecting the format.
-    
+
     Args:
         source: File path or content string
         format_hint: Optional format hint ('rst', 'markdown', etc.)
-        
+
     Returns:
         ParseResult
     """
@@ -326,21 +326,21 @@ def parse_document(source: Union[str, Path], format_hint: Optional[str] = None)
         elif format_hint.lower() in ('md', 'markdown'):
             from .markdown_parser import MarkdownParser
             return MarkdownParser().parse(source)
-    
+
     # Auto-detect from file extension
     parser = get_parser_for_file(source)
     if parser:
         return parser.parse(source)
-    
+
     # Try content-based detection
     content = source if isinstance(source, str) else Path(source).read_text()
-    
+
     # Check for RST indicators
     rst_indicators = ['.. ', '::\n', ':ref:`', '.. toctree::', '.. code-block::']
     if any(ind in content for ind in rst_indicators):
         from .rst_parser import RstParser
         return RstParser().parse_string(content)
-    
+
     # Default to Markdown
     from .markdown_parser import MarkdownParser
     return MarkdownParser().parse_string(content)
diff --git a/src/skill_seekers/cli/parsers/extractors/formatters.py b/src/skill_seekers/cli/parsers/extractors/formatters.py
index db92f5f..5f4cc4e 100644
--- a/src/skill_seekers/cli/parsers/extractors/formatters.py
+++ b/src/skill_seekers/cli/parsers/extractors/formatters.py
@@ -7,45 +7,44 @@ Convert unified Document structure to various output formats.
 from typing import Any
 
 from .unified_structure import (
-    Document, ContentBlock, ContentBlockType, CrossRefType,
-    AdmonitionType, ListType, Table, CodeBlock
+    Document, ContentBlock, ContentBlockType, AdmonitionType, ListType, Table
 )
 
 
 class MarkdownFormatter:
     """Format Document as Markdown."""
-    
+
     def __init__(self, options: dict[str, Any] = None):
         self.options = options or {}
         self.include_toc = self.options.get('include_toc', False)
         self.max_heading_level = self.options.get('max_heading_level', 6)
         self.code_block_style = self.options.get('code_block_style', 'fenced')
         self.table_style = self.options.get('table_style', 'github')
-    
+
     def format(self, document: Document) -> str:
         """Convert document to markdown string."""
         parts = []
-        
+
         # Title
         if document.title:
             parts.append(f"# {document.title}\n")
-        
+
         # Metadata as YAML frontmatter
         if document.meta:
             parts.append(self._format_metadata(document.meta))
-        
+
         # Table of contents
         if self.include_toc and document.headings:
             parts.append(self._format_toc(document.headings))
-        
+
         # Content blocks
         for block in document.blocks:
             formatted = self._format_block(block)
             if formatted:
                 parts.append(formatted)
-        
+
         return '\n'.join(parts)
-    
+
     def _format_metadata(self, meta: dict) -> str:
         """Format metadata as YAML frontmatter."""
         lines = ['---']
@@ -58,7 +57,7 @@ class MarkdownFormatter:
                 lines.append(f"{key}: {value}")
         lines.append('---\n')
         return '\n'.join(lines)
-    
+
     def _format_toc(self, headings: list) -> str:
         """Format table of contents."""
         lines = ['## Table of Contents\n']
@@ -69,7 +68,7 @@ class MarkdownFormatter:
                 lines.append(f"{indent}- [{h.text}](#{anchor})")
         lines.append('')
         return '\n'.join(lines)
-    
+
     def _format_block(self, block: ContentBlock) -> str:
         """Format a single content block."""
         handlers = {
@@ -86,14 +85,14 @@ class MarkdownFormatter:
             ContentBlockType.DEFINITION_LIST: self._format_definition_list,
             ContentBlockType.META: self._format_meta,
         }
-        
+
         handler = handlers.get(block.type)
         if handler:
             return handler(block)
-        
+
         # Default: return content as-is
         return block.content + '\n'
-    
+
     def _format_heading(self, block: ContentBlock) -> str:
         """Format heading block."""
         heading_data = block.metadata.get('heading_data')
@@ -103,87 +102,84 @@ class MarkdownFormatter:
         else:
             level = block.metadata.get('level', 1)
             text = block.content
-        
+
         if level > self.max_heading_level:
             return f"**{text}**\n"
-        
+
         return f"{'#' * level} {text}\n"
-    
+
     def _format_paragraph(self, block: ContentBlock) -> str:
         """Format paragraph block."""
         return block.content + '\n'
-    
+
     def _format_code_block(self, block: ContentBlock) -> str:
         """Format code block."""
         code_data = block.metadata.get('code_data')
-        
+
         if code_data:
             code = code_data.code
             lang = code_data.language or ''
         else:
             code = block.content
             lang = block.metadata.get('language', '')
-        
+
         if self.code_block_style == 'fenced':
             return f"```{lang}\n{code}\n```\n"
         else:
             # Indented style
             indented = '\n'.join('    ' + line for line in code.split('\n'))
             return indented + '\n'
-    
+
     def _format_table(self, block: ContentBlock) -> str:
         """Format table block."""
         table_data = block.metadata.get('table_data')
         if not table_data:
             return ''
-        
+
         return self._format_table_data(table_data)
-    
+
     def _format_table_data(self, table: Table) -> str:
         """Format table data as markdown."""
         if not table.rows:
             return ''
-        
+
         lines = []
-        
+
         # Caption
         if table.caption:
             lines.append(f"**{table.caption}**\n")
-        
+
         # Headers
         headers = table.headers or table.rows[0]
         lines.append('| ' + ' | '.join(headers) + ' |')
         lines.append('|' + '|'.join('---' for _ in headers) + '|')
-        
+
         # Rows (skip first if used as headers)
         start_row = 0 if table.headers else 1
         for row in table.rows[start_row:]:
             # Pad row to match header count
             padded_row = row + [''] * (len(headers) - len(row))
             lines.append('| ' + ' | '.join(padded_row[:len(headers)]) + ' |')
-        
+
         lines.append('')
         return '\n'.join(lines)
-    
+
     def _format_list(self, block: ContentBlock) -> str:
         """Format list block."""
         list_type = block.metadata.get('list_type', ListType.BULLET)
         items = block.metadata.get('items', [])
-        
+
         if not items:
             return block.content + '\n'
-        
+
         lines = []
         for i, item in enumerate(items):
-            if list_type == ListType.NUMBERED:
-                prefix = f"{i + 1}."
-            else:
-                prefix = "-"
+            prefix = f"{i + 1}." if list_type == ListType.NUMBERED else "-"
             lines.append(f"{prefix} {item}")
-        
+
         lines.append('')
         return '\n'.join(lines)
-    
+
     def _format_image(self, block: ContentBlock) -> str:
         """Format image block."""
         image_data = block.metadata.get('image_data')
@@ -193,9 +189,9 @@ class MarkdownFormatter:
         else:
             src = block.metadata.get('src', '')
             alt = block.metadata.get('alt', '')
-        
+
         return f"![{alt}]({src})\n"
-    
+
     def _format_cross_ref(self, block: ContentBlock) -> str:
         """Format cross-reference block."""
         xref_data = block.metadata.get('xref_data')
@@ -203,13 +199,13 @@ class MarkdownFormatter:
             text = xref_data.text or xref_data.target
             target = xref_data.target
             return f"[{text}](#{target})\n"
-        
+
         return block.content + '\n'
-    
+
     def _format_admonition(self, block: ContentBlock) -> str:
         """Format admonition/callout block."""
         admonition_type = block.metadata.get('admonition_type', AdmonitionType.NOTE)
-        
+
         # GitHub-style admonitions
         type_map = {
             AdmonitionType.NOTE: 'NOTE',
@@ -218,16 +214,16 @@ class MarkdownFormatter:
             AdmonitionType.IMPORTANT: 'IMPORTANT',
             AdmonitionType.CAUTION: 'CAUTION',
         }
-        
+
         type_str = type_map.get(admonition_type, 'NOTE')
         content = block.content
-        
+
         return f"> [!{type_str}]\n> {content.replace(chr(10), chr(10) + '> ')}\n"
-    
+
     def _format_directive(self, block: ContentBlock) -> str:
         """Format directive block (RST-specific)."""
         directive_name = block.metadata.get('directive_name', 'unknown')
-        
+
         # Format as a blockquote with directive name
         content = block.content
         lines = [f"> **{directive_name}**"]
@@ -235,13 +231,13 @@ class MarkdownFormatter:
             lines.append(f"> {line}")
         lines.append('')
         return '\n'.join(lines)
-    
+
     def _format_field_list(self, block: ContentBlock) -> str:
         """Format field list block."""
         fields = block.metadata.get('fields', [])
         if not fields:
             return block.content + '\n'
-        
+
         lines = []
         for field in fields:
             if field.arg:
@@ -250,13 +246,13 @@ class MarkdownFormatter:
                 lines.append(f"**{field.name}**: {field.content}")
         lines.append('')
         return '\n'.join(lines)
-    
+
     def _format_definition_list(self, block: ContentBlock) -> str:
         """Format definition list block."""
         items = block.metadata.get('items', [])
         if not items:
             return block.content + '\n'
-        
+
         lines = []
         for item in items:
             if item.classifier:
@@ -266,7 +262,7 @@ class MarkdownFormatter:
             lines.append(f": {item.definition}")
         lines.append('')
         return '\n'.join(lines)
-    
+
     def _format_meta(self, block: ContentBlock) -> str:
         """Format metadata block (usually filtered out)."""
         return ''  # Metadata goes in YAML frontmatter
@@ -274,7 +270,7 @@ class MarkdownFormatter:
 
 class SkillFormatter:
     """Format Document for skill-seekers internal use."""
-    
+
     def format(self, document: Document) -> dict[str, Any]:
         """Format document for skill output."""
         return {
@@ -324,7 +320,7 @@ class SkillFormatter:
                 "processing_time_ms": document.stats.processing_time_ms,
             }
         }
-    
+
     def _extract_summary(self, document: Document, max_length: int = 500) -> str:
         """Extract a text summary from the document."""
         paragraphs = []
@@ -333,22 +329,22 @@ class SkillFormatter:
                 paragraphs.append(block.content)
                 if len(' '.join(paragraphs)) > max_length:
                     break
-        
+
         summary = ' '.join(paragraphs)
         if len(summary) > max_length:
             summary = summary[:max_length - 3] + '...'
-        
+
         return summary
-    
+
     def _score_table(self, table: Table) -> float:
         """Quick table quality score."""
         if not table.rows:
             return 0.0
-        
+
         score = 5.0
         if table.headers:
             score += 2.0
         if 2 <= len(table.rows) <= 50:
             score += 1.0
-        
+
         return min(10.0, score)
diff --git a/src/skill_seekers/cli/parsers/extractors/markdown_parser.py b/src/skill_seekers/cli/parsers/extractors/markdown_parser.py
index 4d68d47..e357569 100644
--- a/src/skill_seekers/cli/parsers/extractors/markdown_parser.py
+++ b/src/skill_seekers/cli/parsers/extractors/markdown_parser.py
@@ -17,13 +17,12 @@ Enhanced with quality scoring and table support.
 """
 
 import re
-from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 
 from .base_parser import BaseParser
 from .unified_structure import (
     Document, ContentBlock, ContentBlockType, CrossReference, CrossRefType,
-    AdmonitionType, Heading, CodeBlock, Table, Image, ListType, ExtractionStats
+    AdmonitionType, Heading, CodeBlock, Table, Image, ListType
 )
 from .quality_scorer import QualityScorer
 
@@ -31,10 +30,10 @@ from .quality_scorer import QualityScorer
 class MarkdownParser(BaseParser):
     """
     Parser for Markdown documents.
-    
+
     Supports standard Markdown and GitHub-flavored Markdown (GFM).
     """
-    
+
     # Admonition types for GitHub-style callouts
     ADMONITION_TYPES = {
         'note': AdmonitionType.NOTE,
@@ -46,21 +45,21 @@ class MarkdownParser(BaseParser):
         'danger': AdmonitionType.DANGER,
         'attention': AdmonitionType.ATTENTION,
     }
-    
-    def __init__(self, options: Optional[dict[str, Any]] = None):
+
+    def __init__(self, options: dict[str, Any] | None = None):
         super().__init__(options)
         self.quality_scorer = QualityScorer()
         self._lines: list[str] = []
         self._current_line = 0
-    
+
     @property
     def format_name(self) -> str:
         return 'markdown'
-    
+
     @property
     def supported_extensions(self) -> list[str]:
         return ['.md', '.markdown', '.mdown', '.mkd']
-    
+
     def _detect_format(self, content: str) -> bool:
         """Detect if content is Markdown."""
         md_indicators = [
@@ -71,34 +70,31 @@ class MarkdownParser(BaseParser):
             r'^\s*[-*+]\s+\S',  # Lists
             r'^>\s+\S',  # Blockquotes
         ]
-        for pattern in md_indicators:
-            if re.search(pattern, content, re.MULTILINE):
-                return True
-        return False
-    
+        return any(re.search(pattern, content, re.MULTILINE) for pattern in md_indicators)
+
     def _parse_content(self, content: str, source_path: str) -> Document:
         """Parse Markdown content into Document."""
         self._lines = content.split('\n')
         self._current_line = 0
-        
+
         document = Document(
             title='',
             format='markdown',
             source_path=source_path,
         )
-        
+
         # Parse frontmatter if present
         frontmatter = self._parse_frontmatter()
         if frontmatter:
             document.meta.update(frontmatter)
-        
+
         # Parse content blocks
         while self._current_line < len(self._lines):
             block = self._parse_block()
             if block:
                 document.blocks.append(block)
             self._current_line += 1
-        
+
         # Extract title from first h1 or frontmatter
         if document.meta.get('title'):
             document.title = document.meta['title']
@@ -109,55 +105,55 @@ class MarkdownParser(BaseParser):
                     if heading_data and heading_data.level == 1:
                         document.title = heading_data.text
                         break
-        
+
         # Extract specialized content
         self._extract_specialized_content(document)
-        
+
         return document
-    
-    def _parse_frontmatter(self) -> Optional[dict]:
+
+    def _parse_frontmatter(self) -> dict | None:
         """Parse YAML frontmatter if present."""
         if self._current_line >= len(self._lines):
             return None
-        
+
         first_line = self._lines[self._current_line].strip()
         if first_line != '---':
             return None
-        
+
         # Find closing ---
         end_line = None
         for i in range(self._current_line + 1, len(self._lines)):
             if self._lines[i].strip() == '---':
                 end_line = i
                 break
-        
+
         if end_line is None:
             return None
-        
+
         # Extract frontmatter content
         frontmatter_lines = self._lines[self._current_line + 1:end_line]
-        frontmatter_content = '\n'.join(frontmatter_lines)
-        
+        '\n'.join(frontmatter_lines)
+
         # Simple key: value parsing (not full YAML)
         meta = {}
         current_key = None
         current_value = []
-        
+
         for line in frontmatter_lines:
             stripped = line.strip()
             if not stripped:
                 continue
-            
+
             # Check for new key
             match = re.match(r'^(\w+):\s*(.*)$', stripped)
             if match:
                 # Save previous key
                 if current_key:
                     meta[current_key] = '\n'.join(current_value).strip()
-                
+
                 current_key = match.group(1)
                 value = match.group(2)
-                
+
                 # Handle inline value
                 if value:
                     # Check if it's a list
@@ -178,146 +174,146 @@ class MarkdownParser(BaseParser):
                 meta[current_key].append(stripped[2:].strip().strip('"\''))
             elif current_key:
                 current_value.append(stripped)
-        
+
         # Save last key
         if current_key:
             meta[current_key] = '\n'.join(current_value).strip()
-        
+
         # Advance past frontmatter
         self._current_line = end_line + 1
-        
+
         return meta
-    
-    def _parse_block(self) -> Optional[ContentBlock]:
+
+    def _parse_block(self) -> ContentBlock | None:
         """Parse a single block at current position."""
         line = self._current_line
         if line >= len(self._lines):
             return None
-        
+
         current = self._lines[line]
         stripped = current.strip()
-        
+
         # Skip empty lines
         if not stripped:
             return None
-        
+
         # Skip HTML comments
         if stripped.startswith('<!--'):
             return self._parse_html_comment()
-        
+
         # ATX Headers
         if stripped.startswith('#'):
             return self._parse_atx_header()
-        
+
         # Setext headers (underline style)
         if self._is_setext_header(line):
             return self._parse_setext_header()
-        
+
         # Code fence
         if stripped.startswith('```'):
             return self._parse_code_fence()
-        
+
         # Indented code block
         if current.startswith('    ') or current.startswith('\t'):
             return self._parse_indented_code()
-        
+
         # Table
         if '|' in stripped and self._is_table(line):
             return self._parse_table()
-        
+
         # Blockquote (check for admonition)
         if stripped.startswith('>'):
             return self._parse_blockquote()
-        
+
         # Horizontal rule
         if re.match(r'^[\-*_]{3,}\s*$', stripped):
             return self._parse_horizontal_rule()
-        
+
         # List
         list_type = self._detect_list_type(stripped)
         if list_type:
             return self._parse_list(list_type)
-        
+
         # Paragraph (default)
         return self._parse_paragraph()
-    
+
     def _is_setext_header(self, line: int) -> bool:
         """Check if current line is a Setext header."""
         if line + 1 >= len(self._lines):
             return False
-        
+
         current = self._lines[line].strip()
         next_line = self._lines[line + 1].strip()
-        
+
         if not current or not next_line:
             return False
-        
+
         # H1: ===, H2: ---
         return re.match(r'^[=-]+$', next_line) is not None
-    
+
     def _parse_atx_header(self) -> ContentBlock:
         """Parse ATX style header (# Header)."""
         line = self._lines[self._current_line]
         match = re.match(r'^(#{1,6})\s+(.+)$', line.strip())
-        
+
         if match:
             level = len(match.group(1))
             text = match.group(2).strip()
             # Remove trailing hashes
             text = re.sub(r'\s+#+$', '', text)
-            
+
             anchor = self._create_anchor(text)
-            
+
             heading = Heading(
                 level=level,
                 text=text,
                 id=anchor,
                 source_line=self._current_line + 1,
             )
-            
+
             return ContentBlock(
                 type=ContentBlockType.HEADING,
                 content=text,
                 metadata={'heading_data': heading},
                 source_line=self._current_line + 1,
             )
-        
+
         return self._parse_paragraph()
-    
+
     def _parse_setext_header(self) -> ContentBlock:
         """Parse Setext style header (underline)."""
         text = self._lines[self._current_line].strip()
         underline = self._lines[self._current_line + 1].strip()
-        
+
         level = 1 if underline[0] == '=' else 2
         anchor = self._create_anchor(text)
-        
+
         heading = Heading(
             level=level,
             text=text,
             id=anchor,
             source_line=self._current_line + 1,
         )
-        
+
         # Skip underline
         self._current_line += 1
-        
+
         return ContentBlock(
             type=ContentBlockType.HEADING,
             content=text,
             metadata={'heading_data': heading},
             source_line=self._current_line,
         )
-    
+
     def _parse_code_fence(self) -> ContentBlock:
         """Parse fenced code block."""
         line = self._lines[self._current_line]
         match = re.match(r'^```(\w+)?\s*$', line.strip())
         language = match.group(1) if match else None
-        
+
         start_line = self._current_line
         self._current_line += 1
-        
+
         code_lines = []
         while self._current_line < len(self._lines):
             current_line = self._lines[self._current_line]
@@ -325,19 +321,19 @@ class MarkdownParser(BaseParser):
                 break
             code_lines.append(current_line)
             self._current_line += 1
-        
+
         code = '\n'.join(code_lines)
-        
+
         # Detect language if not specified
         detected_lang, confidence = self.quality_scorer.detect_language(code)
         if not language and confidence > 0.6:
             language = detected_lang
         elif not language:
             language = 'text'
-        
+
         # Score code quality
         quality = self.quality_scorer.score_code_block(code, language)
-        
+
         code_block = CodeBlock(
             code=code,
             language=language,
@@ -345,7 +341,7 @@ class MarkdownParser(BaseParser):
             confidence=confidence if language == detected_lang else 1.0,
             source_line=start_line + 1,
         )
-        
+
         return ContentBlock(
             type=ContentBlockType.CODE_BLOCK,
             content=code,
@@ -356,19 +352,19 @@ class MarkdownParser(BaseParser):
             source_line=start_line + 1,
             quality_score=quality,
         )
-    
+
     def _parse_indented_code(self) -> ContentBlock:
         """Parse indented code block."""
         code_lines = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             if not line.strip():
                 code_lines.append('')
                 self._current_line += 1
                 continue
-            
+
             if line.startswith('    '):
                 code_lines.append(line[4:])
             elif line.startswith('\t'):
@@ -376,15 +372,15 @@ class MarkdownParser(BaseParser):
             else:
                 self._current_line -= 1
                 break
-            
+
             self._current_line += 1
-        
+
         code = '\n'.join(code_lines).rstrip()
-        
+
         # Detect language
         detected_lang, confidence = self.quality_scorer.detect_language(code)
         quality = self.quality_scorer.score_code_block(code, detected_lang)
-        
+
         code_block = CodeBlock(
             code=code,
             language=detected_lang if confidence > 0.6 else 'text',
@@ -392,7 +388,7 @@ class MarkdownParser(BaseParser):
             confidence=confidence,
             source_line=start_line + 1,
         )
-        
+
         return ContentBlock(
             type=ContentBlockType.CODE_BLOCK,
             content=code,
@@ -403,52 +399,49 @@ class MarkdownParser(BaseParser):
             source_line=start_line + 1,
             quality_score=quality,
         )
-    
+
     def _is_table(self, line: int) -> bool:
         """Check if current position is a table."""
         if line + 1 >= len(self._lines):
             return False
-        
+
         current = self._lines[line].strip()
         next_line = self._lines[line + 1].strip()
-        
+
         # Check for table separator line
-        if re.match(r'^[\|:-]+$', next_line) and '|' in current:
-            return True
-        
-        return False
-    
+        return bool(re.match(r'^[\|:-]+$', next_line) and '|' in current)
+
     def _parse_table(self) -> ContentBlock:
         """Parse a GFM table."""
         rows = []
         headers = None
         start_line = self._current_line
-        
+
         # Parse header row
         header_line = self._lines[self._current_line].strip()
         headers = [cell.strip() for cell in header_line.split('|')]
         headers = [h for h in headers if h]  # Remove empty
         self._current_line += 1
-        
+
         # Skip separator line (|:--:| etc.)
         if self._current_line < len(self._lines):
             self._current_line += 1
-        
+
         # Parse data rows
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line].strip()
-            
+
             if not line or '|' not in line:
                 self._current_line -= 1
                 break
-            
+
             cells = [cell.strip() for cell in line.split('|')]
             cells = [c for c in cells if c]
             if cells:
                 rows.append(cells)
-            
+
             self._current_line += 1
-        
+
         table = Table(
             rows=rows,
             headers=headers,
@@ -456,9 +449,9 @@ class MarkdownParser(BaseParser):
             source_format='markdown',
             source_line=start_line + 1,
         )
-        
+
         quality = self.quality_scorer.score_table(table)
-        
+
         return ContentBlock(
             type=ContentBlockType.TABLE,
             content=f"[Table: {len(rows)} rows]",
@@ -466,25 +459,25 @@ class MarkdownParser(BaseParser):
             source_line=start_line + 1,
             quality_score=quality,
         )
-    
+
     def _parse_blockquote(self) -> ContentBlock:
         """Parse a blockquote, checking for admonitions."""
         lines = []
         start_line = self._current_line
         admonition_type = None
         admonition_content = []
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             if not stripped.startswith('>'):
                 self._current_line -= 1
                 break
-            
+
             # Remove > prefix
             content = line[1:].strip() if line.startswith('> ') else line[1:].strip()
-            
+
             # Check for GitHub-style admonition: > [!NOTE]
             admonition_match = re.match(r'^\[!([\w]+)\]\s*(.*)$', content)
             if admonition_match and not admonition_type:
@@ -497,9 +490,9 @@ class MarkdownParser(BaseParser):
                 admonition_content.append(content)
             else:
                 lines.append(content)
-            
+
             self._current_line += 1
-        
+
         # Return as admonition if detected
         if admonition_type:
             return ContentBlock(
@@ -508,7 +501,7 @@ class MarkdownParser(BaseParser):
                 metadata={'admonition_type': admonition_type},
                 source_line=start_line + 1,
             )
-        
+
         # Regular blockquote
         content = '\n'.join(lines)
         return ContentBlock(
@@ -517,24 +510,23 @@ class MarkdownParser(BaseParser):
             metadata={'block_type': 'blockquote'},
             source_line=start_line + 1,
         )
-    
-    def _parse_html_comment(self) -> Optional[ContentBlock]:
+
+    def _parse_html_comment(self) -> ContentBlock | None:
         """Parse HTML comment (usually skip)."""
-        start_line = self._current_line
         content_lines = []
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             content_lines.append(line)
-            
+
             if '-->' in line:
                 break
-            
+
             self._current_line += 1
-        
+
         # Skip comments in output (could optionally include)
         return None
-    
+
     def _parse_horizontal_rule(self) -> ContentBlock:
         """Parse horizontal rule."""
         return ContentBlock(
@@ -543,28 +535,28 @@ class MarkdownParser(BaseParser):
             metadata={'element': 'horizontal_rule'},
             source_line=self._current_line + 1,
         )
-    
-    def _detect_list_type(self, stripped: str) -> Optional[ListType]:
+
+    def _detect_list_type(self, stripped: str) -> ListType | None:
         """Detect if line starts a list and which type."""
         if re.match(r'^[-*+]\s+', stripped):
             return ListType.BULLET
         if re.match(r'^\d+\.\s+', stripped):
             return ListType.NUMBERED
         return None
-    
+
     def _parse_list(self, list_type: ListType) -> ContentBlock:
         """Parse a list."""
         items = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             if not stripped:
                 self._current_line += 1
                 continue
-            
+
             # Check if still in list
             if list_type == ListType.BULLET:
                 match = re.match(r'^[-*+]\s+(.+)$', stripped)
@@ -578,9 +570,9 @@ class MarkdownParser(BaseParser):
                     self._current_line -= 1
                     break
                 items.append(match.group(1))
-            
+
             self._current_line += 1
-        
+
         return ContentBlock(
             type=ContentBlockType.LIST,
             content=f"{len(items)} items",
@@ -590,20 +582,20 @@ class MarkdownParser(BaseParser):
             },
             source_line=start_line + 1,
         )
-    
+
     def _parse_paragraph(self) -> ContentBlock:
         """Parse a paragraph."""
         lines = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             # End of paragraph
             if not stripped:
                 break
-            
+
             # Check for block-level elements
             if stripped.startswith('#'):
                 break
@@ -619,45 +611,45 @@ class MarkdownParser(BaseParser):
                 break
             if self._is_setext_header(self._current_line):
                 break
-            
+
             lines.append(stripped)
             self._current_line += 1
-        
+
         content = ' '.join(lines)
-        
+
         # Process inline elements
         content = self._process_inline(content)
-        
+
         return ContentBlock(
             type=ContentBlockType.PARAGRAPH,
             content=content,
             source_line=start_line + 1,
         )
-    
+
     def _process_inline(self, text: str) -> str:
         """Process inline Markdown elements."""
         # Links [text](url)
         text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'[\1](\2)', text)
-        
+
         # Images ![alt](url)
         text = re.sub(r'!\[([^\]]*)\]\(([^)]+)\)', r'![\1](\2)', text)
-        
+
         # Code `code`
         text = re.sub(r'`([^`]+)`', r'`\1`', text)
-        
+
         # Bold **text** or __text__
         text = re.sub(r'\*\*([^*]+)\*\*', r'**\1**', text)
         text = re.sub(r'__([^_]+)__', r'**\1**', text)
-        
+
         # Italic *text* or _text_
         text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'*\1*', text)
         text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'*\1*', text)
-        
+
         # Strikethrough ~~text~~
         text = re.sub(r'~~([^~]+)~~', r'~~\1~~', text)
-        
+
         return text
-    
+
     def _create_anchor(self, text: str) -> str:
         """Create URL anchor from heading text."""
         anchor = text.lower()
@@ -665,7 +657,7 @@ class MarkdownParser(BaseParser):
         anchor = anchor.replace(' ', '-')
         anchor = re.sub(r'-+', '-', anchor)
         return anchor.strip('-')
-    
+
     def _extract_specialized_content(self, document: Document):
         """Extract specialized content lists from blocks."""
         for block in document.blocks:
@@ -674,19 +666,19 @@ class MarkdownParser(BaseParser):
                 heading_data = block.metadata.get('heading_data')
                 if heading_data:
                     document.headings.append(heading_data)
-            
+
             # Extract code blocks
             elif block.type == ContentBlockType.CODE_BLOCK:
                 code_data = block.metadata.get('code_data')
                 if code_data:
                     document.code_blocks.append(code_data)
-            
+
             # Extract tables
             elif block.type == ContentBlockType.TABLE:
                 table_data = block.metadata.get('table_data')
                 if table_data:
                     document.tables.append(table_data)
-            
+
             # Extract images from paragraphs (simplified)
             elif block.type == ContentBlockType.PARAGRAPH:
                 content = block.content
@@ -698,7 +690,7 @@ class MarkdownParser(BaseParser):
                         source_line=block.source_line,
                     )
                     document.images.append(image)
-                
+
                 # Extract links
                 link_matches = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', content)
                 for text, url in link_matches:
@@ -709,14 +701,14 @@ class MarkdownParser(BaseParser):
                         ref_type = CrossRefType.EXTERNAL
                     else:
                         ref_type = CrossRefType.INTERNAL
-                    
+
                     xref = CrossReference(
                         ref_type=ref_type,
                         target=url,
                         text=text,
                         source_line=block.source_line,
                     )
-                    
+
                     if ref_type == CrossRefType.EXTERNAL:
                         document.external_links.append(xref)
                     else:
diff --git a/src/skill_seekers/cli/parsers/extractors/pdf_parser.py b/src/skill_seekers/cli/parsers/extractors/pdf_parser.py
index 0d260ae..4490b51 100644
--- a/src/skill_seekers/cli/parsers/extractors/pdf_parser.py
+++ b/src/skill_seekers/cli/parsers/extractors/pdf_parser.py
@@ -5,7 +5,7 @@ Wraps PDFExtractor to provide unified Document output.
 """
 
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 
 from .base_parser import BaseParser, ParseResult
 from .quality_scorer import QualityScorer
@@ -14,7 +14,6 @@ from .unified_structure import (
     ContentBlock,
     ContentBlockType,
     Document,
-    ExtractionStats,
     Heading,
     Image,
     Table,
@@ -33,13 +32,13 @@ except ImportError:
 class PdfParser(BaseParser):
     """
     Parser for PDF documents.
-    
+
     Wraps the existing PDFExtractor to provide unified Document output
     while maintaining all PDF-specific features (OCR, image extraction,
     table extraction, etc.).
     """
 
-    def __init__(self, options: Optional[dict[str, Any]] = None):
+    def __init__(self, options: dict[str, Any] | None = None):
         super().__init__(options)
         self.pdf_options = {
             "verbose": self.options.get("verbose", False),
@@ -71,7 +70,7 @@ class PdfParser(BaseParser):
     def _parse_content(self, content: str, source_path: str) -> Document:
         """
         Parse PDF content into Document.
-        
+
         Note: For PDF, we need the file path, not content string.
         This method is mainly for API compatibility.
         """
@@ -83,10 +82,10 @@ class PdfParser(BaseParser):
     def parse_file(self, path: str | Path) -> ParseResult:
         """
         Parse a PDF file.
-        
+
         Args:
             path: Path to PDF file
-            
+
         Returns:
             ParseResult with Document or error info
         """
@@ -97,7 +96,7 @@ class PdfParser(BaseParser):
             result.errors.append(f"File not found: {path}")
             return result
 
-        if not path.suffix.lower() == ".pdf":
+        if path.suffix.lower() != ".pdf":
             result.errors.append(f"Not a PDF file: {path}")
             return result
 
@@ -127,7 +126,7 @@ class PdfParser(BaseParser):
 
             # Convert to unified Document
             document = self._convert_to_document(extraction_result, str(path))
-            
+
             result.document = document
             result.success = True
             result.warnings.extend(document.stats.warnings)
@@ -157,13 +156,13 @@ class PdfParser(BaseParser):
 
         # Process pages
         pages = extraction_result.get("pages", [])
-        
+
         for page_num, page_data in enumerate(pages):
             # Add page heading
             page_heading = f"Page {page_num + 1}"
             if page_data.get("headings"):
                 page_heading = page_data["headings"][0].get("text", page_heading)
-            
+
             document.blocks.append(
                 ContentBlock(
                     type=ContentBlockType.HEADING,
@@ -200,7 +199,7 @@ class PdfParser(BaseParser):
                     source_line=page_num + 1,
                 )
                 document.code_blocks.append(code_block)
-                
+
                 document.blocks.append(
                     ContentBlock(
                         type=ContentBlockType.CODE_BLOCK,
@@ -224,7 +223,7 @@ class PdfParser(BaseParser):
                     source_line=page_num + 1,
                 )
                 document.tables.append(table)
-                
+
                 quality = self.quality_scorer.score_table(table)
                 document.blocks.append(
                     ContentBlock(
@@ -268,7 +267,7 @@ class PdfParser(BaseParser):
     def parse(self, source: str | Path) -> ParseResult:
         """
         Parse PDF from source.
-        
+
         For PDF files, source should be a file path.
         """
         if isinstance(source, str) and Path(source).exists():
diff --git a/src/skill_seekers/cli/parsers/extractors/quality_scorer.py b/src/skill_seekers/cli/parsers/extractors/quality_scorer.py
index f2bc836..4c377d6 100644
--- a/src/skill_seekers/cli/parsers/extractors/quality_scorer.py
+++ b/src/skill_seekers/cli/parsers/extractors/quality_scorer.py
@@ -8,14 +8,13 @@ Provides consistent quality scoring across all parsers for:
 """
 
 import re
-from typing import Optional
 
-from .unified_structure import CodeBlock, Table, ContentBlock
+from .unified_structure import Table, ContentBlock
 
 
 class QualityScorer:
     """Score the quality of extracted content."""
-    
+
     # Language patterns for detection and validation
     LANGUAGE_PATTERNS = {
         'python': {
@@ -122,26 +121,26 @@ class QualityScorer:
             ],
         },
     }
-    
-    def score_code_block(self, code: str, language: Optional[str] = None) -> float:
+
+    def score_code_block(self, code: str, language: str | None = None) -> float:
         """
         Score a code block for quality (0-10).
-        
+
         Args:
             code: The code content
             language: Detected or specified language
-            
+
         Returns:
             Quality score from 0-10
         """
         score = 5.0  # Start neutral
-        
+
         if not code or not code.strip():
             return 0.0
-        
+
         code = code.strip()
-        lines = [l for l in code.split('\n') if l.strip()]
-        
+        lines = [line for line in code.split('\n') if line.strip()]
+
         # Factor 1: Length appropriateness
         code_len = len(code)
         if 50 <= code_len <= 1000:
@@ -150,22 +149,22 @@ class QualityScorer:
             score -= 1.0  # Too long
         elif code_len < 20:
             score -= 2.0  # Too short
-        
+
         # Factor 2: Line count
         if 3 <= len(lines) <= 50:
             score += 1.0
         elif len(lines) > 100:
             score -= 0.5
-        
+
         # Factor 3: Language-specific validation
         if language and language in self.LANGUAGE_PATTERNS:
             lang_patterns = self.LANGUAGE_PATTERNS[language]
-            
+
             # Check for keywords
             keyword_matches = sum(1 for kw in lang_patterns['keywords'] if kw in code)
             if keyword_matches >= 2:
                 score += 1.0
-            
+
             # Check for syntax patterns
             syntax_matches = sum(
                 1 for pattern, _ in lang_patterns['syntax_checks']
@@ -173,27 +172,27 @@ class QualityScorer:
             )
             if syntax_matches >= 1:
                 score += 1.0
-        
+
         # Factor 4: Structural quality
         # Check for function/class definitions
         if re.search(r'\b(def|function|func|fn|class|public class)\b', code):
             score += 1.5
-        
+
         # Check for meaningful variable names (not just x, y, i)
         meaningful_vars = re.findall(r'\b[a-z_][a-z0-9_]{3,}\b', code.lower())
         if len(meaningful_vars) >= 3:
             score += 0.5
-        
+
         # Factor 5: Syntax validation (generic)
         is_valid, issues = self._validate_syntax(code, language)
         if is_valid:
             score += 1.0
         else:
             score -= len(issues) * 0.3
-        
+
         # Factor 6: Comment/code ratio
         comment_lines = sum(
-            1 for line in lines 
+            1 for line in lines
             if line.strip().startswith(('#', '//', '/*', '*', '--', '<!--'))
         )
         if len(lines) > 0:
@@ -202,14 +201,14 @@ class QualityScorer:
                 score += 0.5  # Good comment ratio
             elif comment_ratio > 0.6:
                 score -= 1.0  # Too many comments
-        
+
         # Clamp to 0-10
         return max(0.0, min(10.0, score))
-    
-    def _validate_syntax(self, code: str, language: Optional[str]) -> tuple[bool, list[str]]:
+
+    def _validate_syntax(self, code: str, language: str | None) -> tuple[bool, list[str]]:
         """Basic syntax validation."""
         issues = []
-        
+
         # Check for balanced braces/brackets
         pairs = [('{', '}'), ('[', ']'), ('(', ')')]
         for open_char, close_char in pairs:
@@ -217,13 +216,13 @@ class QualityScorer:
             close_count = code.count(close_char)
             if abs(open_count - close_count) > 2:
                 issues.append(f"Unbalanced {open_char}{close_char}")
-        
+
         # Check for common natural language indicators
         common_words = ['the', 'and', 'for', 'with', 'this', 'that', 'have', 'from', 'they']
         word_count = sum(1 for word in common_words if f' {word} ' in code.lower())
         if word_count > 5 and len(code.split()) < 100:
             issues.append("May be natural language")
-        
+
         # Language-specific checks
         if language == 'python':
             # Check for mixed indentation
@@ -235,32 +234,32 @@ class QualityScorer:
                     indent_chars.add('tab')
             if len(indent_chars) > 1:
                 issues.append("Mixed tabs and spaces")
-        
+
         elif language == 'json':
             try:
                 import json
                 json.loads(code)
             except Exception as e:
                 issues.append(f"Invalid JSON: {str(e)[:50]}")
-        
+
         return len(issues) == 0, issues
-    
+
     def score_table(self, table: Table) -> float:
         """
         Score a table for quality (0-10).
-        
+
         Args:
             table: The table to score
-            
+
         Returns:
             Quality score from 0-10
         """
         score = 5.0
-        
+
         # Factor 1: Has headers
         if table.headers:
             score += 1.0
-        
+
         # Factor 2: Consistent column count
         if table.rows:
             col_counts = [len(row) for row in table.rows]
@@ -268,18 +267,18 @@ class QualityScorer:
                 score += 1.0  # Consistent
             else:
                 score -= 1.0  # Inconsistent
-        
+
         # Factor 3: Reasonable size
         if 2 <= table.num_rows <= 100:
             score += 0.5
         elif table.num_rows > 500:
             score -= 0.5
-        
+
         if 2 <= table.num_cols <= 10:
             score += 0.5
         elif table.num_cols > 20:
             score -= 0.5
-        
+
         # Factor 4: Non-empty cells
         if table.rows:
             total_cells = sum(len(row) for row in table.rows)
@@ -290,72 +289,69 @@ class QualityScorer:
                     score += 1.0
                 elif empty_ratio > 0.5:
                     score -= 1.0
-        
+
         # Factor 5: Has caption (good for API docs)
         if table.caption:
             score += 0.5
-        
+
         return max(0.0, min(10.0, score))
-    
+
     def score_content_block(self, block: ContentBlock) -> float:
         """Score a generic content block."""
         score = 5.0
         content = block.content
-        
+
         if not content:
             return 0.0
-        
+
         # Length check
         if len(content) < 10:
             score -= 2.0
         elif len(content) > 1000:
             score += 0.5
-        
+
         # Structure check
         if '.' in content:  # Has sentences
             score += 0.5
         if content[0].isupper():  # Starts with capital
             score += 0.5
-        
+
         return max(0.0, min(10.0, score))
-    
+
     def detect_language(self, code: str) -> tuple[str, float]:
         """
         Detect programming language from code.
-        
+
         Returns:
             Tuple of (language, confidence)
         """
         code = code.strip()
         if not code:
             return 'unknown', 0.0
-        
+
         scores = {}
-        
+
         for lang, patterns in self.LANGUAGE_PATTERNS.items():
             score = 0.0
-            
+
             # Check keywords
             keyword_hits = sum(1 for kw in patterns['keywords'] if kw in code)
             score += keyword_hits * 0.5
-            
+
             # Check syntax patterns
             for pattern, _ in patterns['syntax_checks']:
                 if re.search(pattern, code, re.MULTILINE):
                     score += 1.0
-            
+
             scores[lang] = score
-        
+
         if not scores:
             return 'unknown', 0.0
-        
+
         best_lang = max(scores, key=scores.get)
         best_score = scores[best_lang]
-        
+
         # Normalize confidence
-        if best_score >= 3:
-            confidence = min(1.0, best_score / 5)
-        else:
-            confidence = best_score / 10
-        
+        confidence = min(1.0, best_score / 5) if best_score >= 3 else best_score / 10
+
         return best_lang, confidence
diff --git a/src/skill_seekers/cli/parsers/extractors/rst_parser.py b/src/skill_seekers/cli/parsers/extractors/rst_parser.py
index 4a493ec..92f5bce 100644
--- a/src/skill_seekers/cli/parsers/extractors/rst_parser.py
+++ b/src/skill_seekers/cli/parsers/extractors/rst_parser.py
@@ -17,14 +17,13 @@ Optimized for Godot documentation parsing.
 """
 
 import re
-from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 
 from .base_parser import BaseParser
 from .unified_structure import (
     Document, ContentBlock, ContentBlockType, CrossReference, CrossRefType,
     AdmonitionType, Heading, CodeBlock, Table, Field, DefinitionItem,
-    Image, ListType, ExtractionStats
+    Image, ListType
 )
 from .quality_scorer import QualityScorer
 
@@ -32,13 +31,13 @@ from .quality_scorer import QualityScorer
 class RstParser(BaseParser):
     """
     Parser for ReStructuredText documents.
-    
+
     Handles standard RST as well as Godot-specific extensions.
     """
-    
+
     # RST header underline characters (in order of level)
     HEADER_CHARS = ['=', '-', '~', '^', '"', "'", '`', ':', '.', '_', '*', '+', '#']
-    
+
     # Admonition directives
     ADMONITION_DIRECTIVES = {
         'note': AdmonitionType.NOTE,
@@ -54,7 +53,7 @@ class RstParser(BaseParser):
         'versionadded': AdmonitionType.VERSIONADDED,
         'versionchanged': AdmonitionType.VERSIONCHANGED,
     }
-    
+
     # Cross-reference patterns
     CROSS_REF_PATTERNS = [
         (r':ref:`([^`]+)`', CrossRefType.REF),
@@ -69,7 +68,7 @@ class RstParser(BaseParser):
         (r':data:`([^`]+)`', CrossRefType.DATA),
         (r':exc:`([^`]+)`', CrossRefType.EXC),
     ]
-    
+
     # Field list fields (common in docstrings)
     FIELD_NAMES = [
         'param', 'parameter', 'arg', 'argument',
@@ -81,22 +80,22 @@ class RstParser(BaseParser):
         'todo', 'deprecated', 'versionadded', 'versionchanged',
         'args', 'kwargs', 'keyword', 'keywords',
     ]
-    
-    def __init__(self, options: Optional[dict[str, Any]] = None):
+
+    def __init__(self, options: dict[str, Any] | None = None):
         super().__init__(options)
         self.quality_scorer = QualityScorer()
         self._current_line = 0
         self._lines: list[str] = []
         self._substitutions: dict[str, str] = {}
-    
+
     @property
     def format_name(self) -> str:
         return 'restructuredtext'
-    
+
     @property
     def supported_extensions(self) -> list[str]:
         return ['.rst', '.rest']
-    
+
     def _detect_format(self, content: str) -> bool:
         """Detect if content is RST."""
         rst_indicators = [
@@ -105,26 +104,23 @@ class RstParser(BaseParser):
             r':\w+:`[^`]+`',  # Cross-references
             r'\.\.\s+_`[^`]+`:',  # Targets
         ]
-        for pattern in rst_indicators:
-            if re.search(pattern, content):
-                return True
-        return False
-    
+        return any(re.search(pattern, content) for pattern in rst_indicators)
+
     def _parse_content(self, content: str, source_path: str) -> Document:
         """Parse RST content into Document."""
         self._lines = content.split('\n')
         self._current_line = 0
         self._substitutions = {}
-        
+
         document = Document(
             title='',
             format='rst',
             source_path=source_path,
         )
-        
+
         # First pass: collect substitutions
         self._collect_substitutions()
-        
+
         # Second pass: parse content
         self._current_line = 0
         while self._current_line < len(self._lines):
@@ -132,7 +128,7 @@ class RstParser(BaseParser):
             if block:
                 document.blocks.append(block)
             self._current_line += 1
-        
+
         # Extract title from first heading
         for block in document.blocks:
             if block.type == ContentBlockType.HEADING:
@@ -140,15 +136,15 @@ class RstParser(BaseParser):
                 if heading_data:
                     document.title = heading_data.text
                     break
-        
+
         # Store substitutions
         document.substitutions = self._substitutions.copy()
-        
+
         # Extract specialized content
         self._extract_specialized_content(document)
-        
+
         return document
-    
+
     def _collect_substitutions(self):
         """First pass: collect all substitution definitions."""
         pattern = re.compile(r'^\.\.\s+\|([^|]+)\|\s+replace::\s*(.+)$')
@@ -158,133 +154,133 @@ class RstParser(BaseParser):
                 name = match.group(1).strip()
                 value = match.group(2).strip()
                 self._substitutions[name] = value
-    
-    def _parse_block(self) -> Optional[ContentBlock]:
+
+    def _parse_block(self) -> ContentBlock | None:
         """Parse a single block at current position."""
         line = self._current_line
         if line >= len(self._lines):
             return None
-        
+
         current = self._lines[line]
         stripped = current.strip()
-        
+
         # Skip empty lines
         if not stripped:
             return None
-        
+
         # Skip comments
         if stripped.startswith('.. ') and '::' not in stripped and not stripped.startswith('.. |'):
             # Check if it's a comment
             next_words = stripped[3:].split()
-            if not next_words or next_words[0] not in self.FIELD_NAMES + list(self.ADMONITION_DIRECTIVES.keys()):
-                # Could be a comment, check for explicit comment or just text
-                if not any(c.isalpha() for c in stripped[3:]):
-                    return None
-        
+            if (
+                not next_words or next_words[0] not in self.FIELD_NAMES + list(self.ADMONITION_DIRECTIVES.keys())
+            ) and not any(c.isalpha() for c in stripped[3:]):
+                return None
+
         # Header (underline style)
         if self._is_header(line):
             return self._parse_header()
-        
+
         # Directive
         if stripped.startswith('.. '):
             return self._parse_directive()
-        
+
         # Definition list
         if self._is_definition_list(line):
             return self._parse_definition_list()
-        
+
         # Field list
         if self._is_field_list(line):
             return self._parse_field_list()
-        
+
         # Bullet list
         if stripped.startswith(('- ', '* ', '+ ')):
             return self._parse_bullet_list()
-        
+
         # Numbered list
         if re.match(r'^\d+\.\s', stripped):
             return self._parse_numbered_list()
-        
+
         # Paragraph (default)
         return self._parse_paragraph()
-    
+
     def _is_header(self, line: int) -> bool:
         """Check if current line is a header (has underline)."""
         if line + 1 >= len(self._lines):
             return False
-        
+
         current = self._lines[line].strip()
         next_line = self._lines[line + 1].strip()
-        
+
         if not current or not next_line:
             return False
-        
+
         # Check if next line is all same character and a valid underline char
         if len(set(next_line)) != 1:
             return False
-        
+
         char = next_line[0]
         if char not in self.HEADER_CHARS:
             return False
-        
+
         # Underline should be roughly same length as text
         return len(next_line) >= len(current) - 2
-    
+
     def _parse_header(self) -> ContentBlock:
         """Parse a header with underline."""
         text = self._lines[self._current_line].strip()
         underline = self._lines[self._current_line + 1].strip()
-        
+
         char = underline[0]
         level = self.HEADER_CHARS.index(char) + 1 if char in self.HEADER_CHARS else 1
-        
+
         # Create anchor ID
         anchor = text.lower().replace(' ', '-').replace('_', '-')
         anchor = re.sub(r'[^a-z0-9-]', '', anchor)
-        
+
         heading = Heading(
             level=level,
             text=text,
             id=anchor,
             source_line=self._current_line + 1,
         )
-        
+
         # Skip the underline line
         self._current_line += 1
-        
+
         return ContentBlock(
             type=ContentBlockType.HEADING,
             content=text,
             metadata={'heading_data': heading},
             source_line=self._current_line,
         )
-    
+
     def _parse_directive(self) -> ContentBlock:
         """Parse a directive block."""
         line = self._current_line
         current = self._lines[line].strip()
-        
+
         # Extract directive name
         match = re.match(r'^\.\.\s+([\w\-]+)::\s*(.*)$', current)
         if not match:
             # Could be a comment or something else
             return self._parse_paragraph()
-        
+
         directive_name = match.group(1)
         argument = match.group(2)
-        
+
         # Collect directive content (indented lines)
         content_lines = []
         self._current_line += 1
-        
+
         while self._current_line < len(self._lines):
             current_line = self._lines[self._current_line]
-            
+
             # Check for end of directive (non-indented line or new directive)
             if current_line.strip() and not current_line.startswith(' '):
                 self._current_line -= 1  # Back up, this line belongs to next block
                 break
-            
+
             # Collect content (remove common indentation)
             if current_line.startswith('   '):
                 content_lines.append(current_line[3:])
@@ -296,11 +292,11 @@ class RstParser(BaseParser):
                 content_lines.append(current_line)
             else:
                 content_lines.append('')
-            
+
             self._current_line += 1
-        
+
         content = '\n'.join(content_lines).strip()
-        
+
         # Route to specific directive handler
         if directive_name in self.ADMONITION_DIRECTIVES:
             return self._parse_admonition_directive(
@@ -331,16 +327,16 @@ class RstParser(BaseParser):
                 metadata={'directive_name': directive_name, 'argument': argument},
                 source_line=line + 1,
             )
-    
-    def _parse_admonition_directive(self, name: str, argument: str, 
+
+    def _parse_admonition_directive(self, name: str, argument: str,
                                     content: str, line: int) -> ContentBlock:
         """Parse an admonition directive (note, warning, etc.)."""
         admonition_type = self.ADMONITION_DIRECTIVES.get(name, AdmonitionType.NOTE)
-        
+
         full_content = argument
         if content:
             full_content += '\n' + content if full_content else content
-        
+
         return ContentBlock(
             type=ContentBlockType.ADMONITION,
             content=full_content,
@@ -350,19 +346,19 @@ class RstParser(BaseParser):
             },
             source_line=line,
         )
-    
+
     def _parse_code_block_directive(self, language: str, content: str, line: int) -> ContentBlock:
         """Parse a code-block directive."""
         lang = language.strip() or 'text'
-        
+
         # Score the code
         quality = self.quality_scorer.score_code_block(content, lang)
         detected_lang, confidence = self.quality_scorer.detect_language(content)
-        
+
         # Use detected language if none specified and confidence is high
         if lang == 'text' and confidence > 0.7:
             lang = detected_lang
-        
+
         code_block = CodeBlock(
             code=content,
             language=lang,
@@ -370,7 +366,7 @@ class RstParser(BaseParser):
             confidence=confidence,
             source_line=line,
         )
-        
+
         return ContentBlock(
             type=ContentBlockType.CODE_BLOCK,
             content=content,
@@ -381,7 +377,7 @@ class RstParser(BaseParser):
             source_line=line,
             quality_score=quality,
         )
-    
+
     def _parse_table_directive(self, caption: str, content: str, line: int) -> ContentBlock:
         """Parse a table directive (simple or grid table)."""
         # Try to detect table type from content
@@ -389,9 +385,9 @@ class RstParser(BaseParser):
             table = self._parse_grid_table(content, caption, line)
         else:
             table = self._parse_simple_table(content, caption, line)
-        
+
         quality = self.quality_scorer.score_table(table)
-        
+
         return ContentBlock(
             type=ContentBlockType.TABLE,
             content=f"[Table: {caption}]" if caption else "[Table]",
@@ -401,24 +397,27 @@ class RstParser(BaseParser):
             source_line=line,
             quality_score=quality,
         )
-    
-    def _parse_simple_table(self, content: str, caption: Optional[str], 
+
+    def _parse_simple_table(self, content: str, caption: str | None,
                            line: int) -> Table:
         """Parse a simple RST table (space-separated columns with = or - separators)."""
         lines = content.split('\n')
         rows = []
         headers = None
         separator_indices = []
-        
+
         # Find separator lines (=== or ---)
         for i, line_text in enumerate(lines):
             stripped = line_text.strip()
             # Match separator lines that contain = or - but no alphanumeric chars
-            if stripped and re.match(r'^[\s=-]+$', stripped) and any(c in stripped for c in '=-'):
-                # Additional check: must have multiple = or - in a row (not just one)
-                if re.search(r'={3,}|-{3,}', stripped):
-                    separator_indices.append(i)
-        
+            if (
+                stripped
+                and re.match(r'^[\s=-]+$', stripped)
+                and any(c in stripped for c in '=-')
+                and re.search(r'={3,}|-{3,}', stripped)
+            ):
+                separator_indices.append(i)
+
         # Determine column boundaries from first separator
         col_boundaries = []
         if separator_indices:
@@ -437,18 +436,18 @@ class RstParser(BaseParser):
                         in_sep = False
             if not in_sep:
                 col_boundaries.append((start, len(sep_line)))
-        
+
         # Parse data rows using column boundaries or whitespace splitting
         for i, line_text in enumerate(lines):
             stripped = line_text.strip()
-            
+
             # Skip separator lines (handle both simple and grid table separators)
             if re.match(r'^[\s=-]+$', stripped) and any(c in stripped for c in '=-'):
                 continue
-            
+
             if not stripped:
                 continue
-            
+
             if '|' in line_text:
                 # Pipe-delimited format
                 cells = [cell.strip() for cell in line_text.split('|')]
@@ -471,25 +470,25 @@ class RstParser(BaseParser):
                 cells = [c for c in cells if c]
                 if cells:
                     rows.append(cells)
-        
+
         # Determine headers from separator positions
         # In RST simple tables: separator, header, separator, data...
         if separator_indices and rows:
             first_sep = separator_indices[0]
-            
+
             # Find first row index (first non-separator line after first separator)
             first_row_idx = None
             for i in range(len(lines)):
                 if i > first_sep and lines[i].strip():
                     # Check if this is a separator
                     stripped = lines[i].strip()
-                    is_sep = bool(re.match(r'^[\s=-]+$', stripped) and 
-                                  any(c in stripped for c in '=-') and 
+                    is_sep = bool(re.match(r'^[\s=-]+$', stripped) and
+                                  any(c in stripped for c in '=-') and
                                   re.search(r'={3,}|-{3,}', stripped))
                     if not is_sep:
                         first_row_idx = i
                         break
-            
+
             # Check if there's a separator after the first row (indicating header)
             if first_row_idx is not None:
                 second_sep = None
@@ -497,12 +496,12 @@ class RstParser(BaseParser):
                     if sep_idx > first_row_idx:
                         second_sep = sep_idx
                         break
-                
+
                 if second_sep is not None:
                     # First row is headers
                     headers = rows[0]
                     rows = rows[1:]
-        
+
         return Table(
             rows=rows,
             headers=headers,
@@ -510,26 +509,26 @@ class RstParser(BaseParser):
             source_format='simple',
             source_line=line,
         )
-    
-    def _parse_grid_table(self, content: str, caption: Optional[str],
+
+    def _parse_grid_table(self, content: str, caption: str | None,
                          line: int) -> Table:
         """Parse a grid RST table."""
         lines = content.split('\n')
         rows = []
         headers = None
         in_header = False
-        
+
         for i, line_text in enumerate(lines):
             # Check for header separator (+=...=+)
             if re.match(r'^\+[=+]+\+$', line_text.strip()):
                 in_header = True
                 continue
-            
+
             # Check for row separator (+-...-+)
             if re.match(r'^\+[-+]+\+$', line_text.strip()):
                 in_header = False
                 continue
-            
+
             # Parse row
             if '|' in line_text:
                 cells = []
@@ -543,7 +542,7 @@ class RstParser(BaseParser):
                         headers = cells
                     else:
                         rows.append(cells)
-        
+
         return Table(
             rows=rows,
             headers=headers,
@@ -551,14 +550,14 @@ class RstParser(BaseParser):
             source_format='grid',
             source_line=line,
         )
-    
-    def _parse_list_table_directive(self, caption: str, content: str, 
+
+    def _parse_list_table_directive(self, caption: str, content: str,
                                     line: int) -> ContentBlock:
         """Parse a list-table directive."""
         lines = content.split('\n')
         rows = []
         headers = None
-        
+
         # Check for :header-rows: option
         header_rows = 0
         for line_text in lines:
@@ -566,31 +565,31 @@ class RstParser(BaseParser):
             if match:
                 header_rows = int(match.group(1))
                 break
-        
+
         # Parse rows (lines starting with * )
         current_row = []
         for line_text in lines:
             stripped = line_text.strip()
-            
+
             # New row
             if re.match(r'^\*\s+-', stripped):
                 if current_row:
                     rows.append(current_row)
                 current_row = []
-            
+
             # Cell content
             if stripped.startswith('- '):
                 cell = stripped[2:].strip()
                 current_row.append(cell)
-        
+
         if current_row:
             rows.append(current_row)
-        
+
         # Extract headers
         if header_rows > 0 and rows:
             headers = rows[0]
             rows = rows[header_rows:]
-        
+
         table = Table(
             rows=rows,
             headers=headers,
@@ -598,9 +597,9 @@ class RstParser(BaseParser):
             source_format='list-table',
             source_line=line,
         )
-        
+
         quality = self.quality_scorer.score_table(table)
-        
+
         return ContentBlock(
             type=ContentBlockType.TABLE,
             content=f"[Table: {caption}]" if caption else "[Table]",
@@ -608,41 +607,41 @@ class RstParser(BaseParser):
             source_line=line,
             quality_score=quality,
         )
-    
+
     def _parse_toctree_directive(self, content: str, line: int) -> ContentBlock:
         """Parse a toctree directive."""
         entries = []
-        
+
         for line_text in content.split('\n'):
             stripped = line_text.strip()
             # Entries are simple lines or :hidden: etc options
             if stripped and not stripped.startswith(':'):
                 entries.append(stripped)
-        
+
         return ContentBlock(
             type=ContentBlockType.TOC_TREE,
             content=f"ToC: {', '.join(entries[:5])}..." if len(entries) > 5 else f"ToC: {', '.join(entries)}",
             metadata={'entries': entries},
             source_line=line,
         )
-    
+
     def _parse_image_directive(self, argument: str, content: str, line: int) -> ContentBlock:
         """Parse an image or figure directive."""
         # Extract options from content
         alt_text = None
         width = None
         height = None
-        
+
         for line_text in content.split('\n'):
             stripped = line_text.strip()
-            
+
             if stripped.startswith(':alt:'):
                 alt_text = stripped[5:].strip()
             elif stripped.startswith(':width:'):
                 width = stripped[7:].strip()
             elif stripped.startswith(':height:'):
                 height = stripped[8:].strip()
-        
+
         image = Image(
             source=argument,
             alt_text=alt_text,
@@ -650,54 +649,54 @@ class RstParser(BaseParser):
             height=int(height) if height and height.isdigit() else None,
             source_line=line,
         )
-        
+
         return ContentBlock(
             type=ContentBlockType.IMAGE,
             content=argument,
             metadata={'image_data': image},
             source_line=line,
         )
-    
+
     def _is_definition_list(self, line: int) -> bool:
         """Check if current line starts a definition list."""
         if line + 1 >= len(self._lines):
             return False
-        
+
         current = self._lines[line].strip()
         next_line = self._lines[line + 1].strip()
-        
+
         # Definition list: term followed by indented definition starting with :
         return next_line.startswith(': ') or (next_line and next_line[0].isspace() and ':' in current)
-    
+
     def _parse_definition_list(self) -> ContentBlock:
         """Parse a definition list."""
         items = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             # End of definition list
             if not stripped:
                 self._current_line += 1
                 continue
-            
+
             if not line.startswith(' ') and items:
                 # New non-indented item, end of list
                 self._current_line -= 1
                 break
-            
+
             # Check for term : classifier pattern (RST standard)
             match = re.match(r'^([^:]+)\s+:\s+(.+)$', stripped)
             if match:
                 term = match.group(1).strip()
                 classifier = match.group(2).strip()
-                
+
                 # Get definition (next indented lines)
                 self._current_line += 1
                 def_lines = []
-                
+
                 while self._current_line < len(self._lines):
                     def_line = self._lines[self._current_line]
                     if def_line.strip() and not def_line.startswith(' '):
@@ -709,9 +708,9 @@ class RstParser(BaseParser):
                     elif def_line.startswith(' '):
                         def_lines.append(def_line[1:])
                     self._current_line += 1
-                
+
                 definition = ' '.join(def_lines).strip()
-                
+
                 items.append(DefinitionItem(
                     term=term,
                     definition=definition,
@@ -720,49 +719,49 @@ class RstParser(BaseParser):
                 ))
             else:
                 self._current_line += 1
-        
+
         return ContentBlock(
             type=ContentBlockType.DEFINITION_LIST,
             content=f"{len(items)} definitions",
             metadata={'items': items},
             source_line=start_line + 1,
         )
-    
+
     def _is_field_list(self, line: int) -> bool:
         """Check if current line starts a field list."""
         current = self._lines[line].strip()
-        
+
         # Field list: :fieldname: or :fieldname arg:
         return re.match(r'^:(\w+)(\s+\w+)?:', current) is not None
-    
+
     def _parse_field_list(self) -> ContentBlock:
         """Parse a field list."""
         fields = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             # End of field list
             if not stripped:
                 self._current_line += 1
                 continue
-            
+
             if not line.startswith(':') and fields:
                 break
-            
+
             # Parse field
             match = re.match(r'^:(\w+)(?:\s+(\S+))?:(.*)$', stripped)
             if match:
                 name = match.group(1)
                 arg = match.group(2)
                 content = match.group(3).strip()
-                
+
                 # Multi-line content (indented lines following)
                 self._current_line += 1
                 content_lines = [content] if content else []
-                
+
                 while self._current_line < len(self._lines):
                     cont_line = self._lines[self._current_line]
                     if cont_line.strip() and not cont_line.startswith(' '):
@@ -774,9 +773,9 @@ class RstParser(BaseParser):
                     elif cont_line.startswith(' '):
                         content_lines.append(cont_line[1:])
                     self._current_line += 1
-                
+
                 full_content = ' '.join(content_lines).strip()
-                
+
                 fields.append(Field(
                     name=name,
                     arg=arg,
@@ -785,39 +784,39 @@ class RstParser(BaseParser):
                 ))
             else:
                 self._current_line += 1
-        
+
         # Back up one line if we broke on a non-field
         if self._current_line < len(self._lines) and not self._lines[self._current_line].strip().startswith(':'):
             self._current_line -= 1
-        
+
         return ContentBlock(
             type=ContentBlockType.FIELD_LIST,
             content=f"{len(fields)} fields",
             metadata={'fields': fields},
             source_line=start_line + 1,
         )
-    
+
     def _parse_bullet_list(self) -> ContentBlock:
         """Parse a bullet list."""
         items = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             if not stripped:
                 self._current_line += 1
                 continue
-            
+
             if not stripped.startswith(('- ', '* ', '+ ')):
                 self._current_line -= 1
                 break
-            
+
             item_text = stripped[2:]
             items.append(item_text)
             self._current_line += 1
-        
+
         return ContentBlock(
             type=ContentBlockType.LIST,
             content=f"{len(items)} items",
@@ -827,28 +826,28 @@ class RstParser(BaseParser):
             },
             source_line=start_line + 1,
         )
-    
+
     def _parse_numbered_list(self) -> ContentBlock:
         """Parse a numbered list."""
         items = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             if not stripped:
                 self._current_line += 1
                 continue
-            
+
             match = re.match(r'^\d+\.\s+(.+)$', stripped)
             if not match:
                 self._current_line -= 1
                 break
-            
+
             items.append(match.group(1))
             self._current_line += 1
-        
+
         return ContentBlock(
             type=ContentBlockType.LIST,
             content=f"{len(items)} items",
@@ -858,74 +857,74 @@ class RstParser(BaseParser):
             },
             source_line=start_line + 1,
         )
-    
+
     def _parse_paragraph(self) -> ContentBlock:
         """Parse a paragraph (default block type)."""
         lines = []
         start_line = self._current_line
-        
+
         while self._current_line < len(self._lines):
             line = self._lines[self._current_line]
             stripped = line.strip()
-            
+
             # End of paragraph
             if not stripped:
                 break
-            
+
             # Check for special constructs
             if stripped.startswith('.. ') or stripped.startswith(': '):
                 break
             if self._is_header(self._current_line):
                 break
-            
+
             lines.append(line)
             self._current_line += 1
-        
+
         raw_content = ' '.join(lines).strip()
-        
+
         # Extract cross-references from raw content before processing
         xrefs, ext_links = self._extract_xrefs_from_text(raw_content, start_line + 1)
-        
+
         # Process inline markup
         content = self._process_inline_markup(raw_content)
-        
+
         block = ContentBlock(
             type=ContentBlockType.PARAGRAPH,
             content=content,
             source_line=start_line + 1,
         )
-        
+
         # Store extracted references in metadata
         if xrefs or ext_links:
             block.metadata['cross_references'] = xrefs
             block.metadata['external_links'] = ext_links
-        
+
         return block
-    
+
     def _process_inline_markup(self, text: str) -> str:
         """Process inline RST markup."""
         # Bold: **text** or *text*
         text = re.sub(r'\*\*([^*]+)\*\*', r'**\1**', text)
-        
+
         # Italic: *text*
         text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'*\1*', text)
-        
+
         # Inline code: ``text``
         text = re.sub(r'``([^`]+)``', r'`\1`', text)
-        
+
         # Links: `text <url>`_ -> [text](url)
         text = re.sub(r'`([^<]+)<([^>]+)>`_', r'[\1](\2)', text)
-        
+
         # Cross-references: :type:`target` -> [target]
         for pattern, ref_type in self.CROSS_REF_PATTERNS:
             text = re.sub(pattern, r'[\1]', text)
-        
+
         # Substitutions: |name| -> value
         for name, value in self._substitutions.items():
             text = text.replace(f'|{name}|', value)
-        
+
         return text
-    
+
     def _extract_specialized_content(self, document: Document):
         """Extract specialized content lists from blocks."""
         for block in document.blocks:
@@ -934,19 +933,19 @@ class RstParser(BaseParser):
                 heading_data = block.metadata.get('heading_data')
                 if heading_data:
                     document.headings.append(heading_data)
-            
+
             # Extract code blocks
             elif block.type == ContentBlockType.CODE_BLOCK:
                 code_data = block.metadata.get('code_data')
                 if code_data:
                     document.code_blocks.append(code_data)
-            
+
             # Extract tables
             elif block.type == ContentBlockType.TABLE:
                 table_data = block.metadata.get('table_data')
                 if table_data:
                     document.tables.append(table_data)
-            
+
             # Extract cross-references from various sources
             elif block.type == ContentBlockType.CROSS_REFERENCE:
                 xref_data = block.metadata.get('xref_data')
@@ -955,31 +954,31 @@ class RstParser(BaseParser):
                         document.internal_links.append(xref_data)
                     else:
                         document.external_links.append(xref_data)
-            
+
             # Extract field lists
             elif block.type == ContentBlockType.FIELD_LIST:
                 fields = block.metadata.get('fields', [])
                 if fields:
                     document.field_lists.append(fields)
-            
+
             # Extract definition lists
             elif block.type == ContentBlockType.DEFINITION_LIST:
                 items = block.metadata.get('items', [])
                 if items:
                     document.definition_lists.append(items)
-            
+
             # Extract ToC trees
             elif block.type == ContentBlockType.TOC_TREE:
                 entries = block.metadata.get('entries', [])
                 if entries:
                     document.toc_trees.append(entries)
-            
+
             # Extract images
             elif block.type == ContentBlockType.IMAGE:
                 image_data = block.metadata.get('image_data')
                 if image_data:
                     document.images.append(image_data)
-            
+
             # Extract cross-references and links from paragraphs
             elif block.type == ContentBlockType.PARAGRAPH:
                 # Get pre-extracted references from metadata
@@ -987,12 +986,12 @@ class RstParser(BaseParser):
                 ext_links = block.metadata.get('external_links', [])
                 document.internal_links.extend(xrefs)
                 document.external_links.extend(ext_links)
-    
-    def _extract_xrefs_from_text(self, text: str, source_line: Optional[int]) -> tuple[list, list]:
+
+    def _extract_xrefs_from_text(self, text: str, source_line: int | None) -> tuple[list, list]:
         """Extract cross-references and links from text."""
         xrefs = []
         external_links = []
-        
+
         # Extract cross-references (:type:`target`)
         for pattern, ref_type in self.CROSS_REF_PATTERNS:
             for match in re.finditer(pattern, text):
@@ -1003,7 +1002,7 @@ class RstParser(BaseParser):
                     source_line=source_line,
                 )
                 xrefs.append(xref)
-        
+
         # Extract external links (`text <url>`_)
         for match in re.finditer(r'`([^<]+)<([^>]+)>`_', text):
             link_text = match.group(1).strip()
@@ -1015,5 +1014,5 @@ class RstParser(BaseParser):
                 source_line=source_line,
             )
             external_links.append(xref)
-        
+
         return xrefs, external_links
diff --git a/src/skill_seekers/cli/parsers/extractors/unified_structure.py b/src/skill_seekers/cli/parsers/extractors/unified_structure.py
index 2b81dcf..ed0e68e 100644
--- a/src/skill_seekers/cli/parsers/extractors/unified_structure.py
+++ b/src/skill_seekers/cli/parsers/extractors/unified_structure.py
@@ -7,8 +7,8 @@ with a consistent structure.
 """
 
 from dataclasses import dataclass, field
-from typing import Any, Optional
-from enum import Enum, auto
+from typing import Any
+from enum import Enum
 
 
 class ContentBlockType(Enum):
@@ -76,20 +76,20 @@ class Heading:
     """A document heading/section title."""
     level: int  # 1-6 for h1-h6, or 1+ for RST underline levels
     text: str
-    id: Optional[str] = None  # Anchor ID
-    source_line: Optional[int] = None
+    id: str | None = None  # Anchor ID
+    source_line: int | None = None
 
 
 @dataclass
 class CodeBlock:
     """A code block with metadata."""
     code: str
-    language: Optional[str] = None
-    quality_score: Optional[float] = None  # 0-10
-    confidence: Optional[float] = None  # Language detection confidence
-    is_valid: Optional[bool] = None  # Syntax validation result
+    language: str | None = None
+    quality_score: float | None = None  # 0-10
+    confidence: float | None = None  # Language detection confidence
+    is_valid: bool | None = None  # Syntax validation result
     validation_issues: list[str] = field(default_factory=list)
-    source_line: Optional[int] = None
+    source_line: int | None = None
     metadata: dict[str, Any] = field(default_factory=dict)
 
 
@@ -97,11 +97,11 @@ class CodeBlock:
 class Table:
     """A table with rows and cells."""
     rows: list[list[str]]  # 2D array of cell content
-    headers: Optional[list[str]] = None
-    caption: Optional[str] = None
-    col_widths: Optional[list[int]] = None
+    headers: list[str] | None = None
+    caption: str | None = None
+    col_widths: list[int] | None = None
     source_format: str = "unknown"  # 'simple', 'grid', 'list-table', 'markdown', 'pdf'
-    source_line: Optional[int] = None
+    source_line: int | None = None
     metadata: dict[str, Any] = field(default_factory=dict)
 
     @property
@@ -120,8 +120,8 @@ class CrossReference:
     """A cross-reference link."""
     ref_type: CrossRefType
     target: str  # Target ID, URL, or path
-    text: Optional[str] = None  # Display text (if different from target)
-    source_line: Optional[int] = None
+    text: str | None = None  # Display text (if different from target)
+    source_line: int | None = None
     resolved: bool = False  # Whether target was resolved
 
 
@@ -129,9 +129,9 @@ class CrossReference:
 class Field:
     """A field in a field list (RST :param:, :returns:, etc.)."""
     name: str  # Field name (e.g., 'param', 'returns', 'type')
-    arg: Optional[str] = None  # Field argument (e.g., parameter name)
+    arg: str | None = None  # Field argument (e.g., parameter name)
     content: str = ""  # Field content
-    source_line: Optional[int] = None
+    source_line: int | None = None
 
 
 @dataclass
@@ -139,19 +139,19 @@ class DefinitionItem:
     """A definition list item (term + definition)."""
     term: str
     definition: str
-    classifier: Optional[str] = None  # RST classifier (term : classifier)
-    source_line: Optional[int] = None
+    classifier: str | None = None  # RST classifier (term : classifier)
+    source_line: int | None = None
 
 
 @dataclass
 class Image:
     """An image reference or embedded image."""
     source: str  # URL, path, or base64 data
-    alt_text: Optional[str] = None
-    width: Optional[int] = None
-    height: Optional[int] = None
+    alt_text: str | None = None
+    width: int | None = None
+    height: int | None = None
     is_embedded: bool = False  # True if data is embedded
-    source_line: Optional[int] = None
+    source_line: int | None = None
 
 
 @dataclass
@@ -160,8 +160,8 @@ class ContentBlock:
     type: ContentBlockType
     content: str = ""
     metadata: dict[str, Any] = field(default_factory=dict)
-    source_line: Optional[int] = None
-    quality_score: Optional[float] = None  # 0-10
+    source_line: int | None = None
+    quality_score: float | None = None  # 0-10
 
     # Type-specific data (stored in metadata for flexibility)
     # For CODE_BLOCK: 'code_data' -> CodeBlock
@@ -183,71 +183,71 @@ class ExtractionStats:
     cross_references: int = 0
     images: int = 0
     warnings: list[str] = field(default_factory=list)
-    processing_time_ms: Optional[float] = None
+    processing_time_ms: float | None = None
 
 
 @dataclass
 class Document:
     """
     Unified document structure - output of ALL parsers.
-    
+
     This class provides a standardized representation of document content
     regardless of the source format (RST, Markdown, PDF, HTML).
     """
     title: str = ""
     format: str = ""  # 'markdown', 'rst', 'pdf', 'html', 'unknown'
     source_path: str = ""
-    
+
     # Core content as blocks
     blocks: list[ContentBlock] = field(default_factory=list)
-    
+
     # Navigation/Structure (derived from blocks for convenience)
     headings: list[Heading] = field(default_factory=list)
     sections: list[dict] = field(default_factory=list)  # Hierarchical structure
-    
+
     # References
     internal_links: list[CrossReference] = field(default_factory=list)
     external_links: list[CrossReference] = field(default_factory=list)
-    
+
     # Specialized content (also in blocks, but extracted for easy access)
     code_blocks: list[CodeBlock] = field(default_factory=list)
     tables: list[Table] = field(default_factory=list)
     images: list[Image] = field(default_factory=list)
-    
+
     # RST-specific (may be empty for other formats)
     field_lists: list[list[Field]] = field(default_factory=list)
     definition_lists: list[list[DefinitionItem]] = field(default_factory=list)
     substitutions: dict[str, str] = field(default_factory=dict)
     toc_trees: list[list[str]] = field(default_factory=list)
-    
+
     # Metadata
     meta: dict[str, Any] = field(default_factory=dict)
-    
+
     # Extraction info
     stats: ExtractionStats = field(default_factory=ExtractionStats)
-    
-    def to_markdown(self, options: Optional[dict] = None) -> str:
+
+    def to_markdown(self, options: dict | None = None) -> str:
         """
         Convert unified structure to markdown output.
-        
+
         Args:
             options: Optional formatting options
                 - include_toc: bool = False
                 - max_heading_level: int = 6
                 - code_block_style: str = 'fenced'  # or 'indented'
                 - table_style: str = 'github'  # or 'simple'
-                
+
         Returns:
             Markdown-formatted string
         """
         from .formatters import MarkdownFormatter
         formatter = MarkdownFormatter(options or {})
         return formatter.format(self)
-    
+
     def to_skill_format(self) -> dict[str, Any]:
         """
         Convert to skill-seekers internal format.
-        
+
         Returns:
             Dictionary compatible with existing skill-seekers pipelines
         """
@@ -292,7 +292,7 @@ class Document:
                 "headings": self.stats.headings,
             }
         }
-    
+
     def _extract_content_text(self) -> str:
         """Extract plain text content from paragraphs."""
         paragraphs = []
@@ -300,21 +300,21 @@ class Document:
             if block.type == ContentBlockType.PARAGRAPH:
                 paragraphs.append(block.content)
         return "\n\n".join(paragraphs)
-    
+
     def get_section_content(self, heading_text: str) -> list[ContentBlock]:
         """
         Get all content blocks under a specific section heading.
-        
+
         Args:
             heading_text: The section heading to find
-            
+
         Returns:
             List of ContentBlock objects in that section
         """
         result = []
         in_section = False
         section_level = None
-        
+
         for block in self.blocks:
             if block.type == ContentBlockType.HEADING:
                 heading_data = block.metadata.get('heading_data')
@@ -325,29 +325,29 @@ class Document:
                 elif in_section and heading_data.level <= section_level:
                     # New section at same or higher level
                     break
-            
+
             if in_section:
                 result.append(block)
-        
+
         return result
-    
+
     def find_blocks_by_type(self, block_type: ContentBlockType) -> list[ContentBlock]:
         """Find all blocks of a specific type."""
         return [b for b in self.blocks if b.type == block_type]
-    
+
     def find_code_by_language(self, language: str) -> list[CodeBlock]:
         """Find all code blocks in a specific language."""
         return [cb for cb in self.code_blocks if cb.language == language]
-    
+
     def find_tables_by_caption(self, pattern: str) -> list[Table]:
         """Find tables with captions matching a pattern."""
         import re
         return [t for t in self.tables if t.caption and re.search(pattern, t.caption, re.I)]
-    
+
     def get_api_summary(self) -> dict[str, Any]:
         """
         Extract API summary if this is API documentation.
-        
+
         Returns:
             Dictionary with 'properties', 'methods', 'signals', etc.
         """
@@ -355,7 +355,7 @@ class Document:
         properties_table = None
         methods_table = None
         signals_table = None
-        
+
         for table in self.tables:
             if table.caption:
                 cap_lower = table.caption.lower()
@@ -365,21 +365,21 @@ class Document:
                     methods_table = table
                 elif 'signal' in cap_lower:
                     signals_table = table
-        
+
         return {
             "properties": self._parse_api_table(properties_table) if properties_table else [],
             "methods": self._parse_api_table(methods_table) if methods_table else [],
             "signals": self._parse_api_table(signals_table) if signals_table else [],
         }
-    
-    def _parse_api_table(self, table: Optional[Table]) -> list[dict]:
+
+    def _parse_api_table(self, table: Table | None) -> list[dict]:
         """Parse an API table into structured data."""
         if not table or not table.rows:
             return []
-        
+
         results = []
         headers = table.headers or []
-        
+
         for row in table.rows:
             if len(row) >= 2:
                 item = {"name": row[0]}
@@ -387,25 +387,25 @@ class Document:
                     if i < len(row):
                         item[header.lower().replace(' ', '_')] = row[i]
                 results.append(item)
-        
+
         return results
 
 
 def merge_documents(docs: list[Document]) -> Document:
     """
     Merge multiple documents into one.
-    
+
     Useful for combining multiple source files into a single skill.
     """
     if not docs:
         return Document()
-    
+
     merged = Document(
         title=docs[0].title,
         format=docs[0].format,
         source_path="merged",
     )
-    
+
     for doc in docs:
         merged.blocks.extend(doc.blocks)
         merged.headings.extend(doc.headings)
@@ -418,12 +418,12 @@ def merge_documents(docs: list[Document]) -> Document:
         merged.definition_lists.extend(doc.definition_lists)
         merged.toc_trees.extend(doc.toc_trees)
         merged.meta.update(doc.meta)
-    
+
     # Merge stats
     merged.stats.total_blocks = sum(d.stats.total_blocks for d in docs)
     merged.stats.code_blocks = sum(d.stats.code_blocks for d in docs)
     merged.stats.tables = sum(d.stats.tables for d in docs)
     merged.stats.headings = sum(d.stats.headings for d in docs)
     merged.stats.cross_references = sum(d.stats.cross_references for d in docs)
-    
+
     return merged
diff --git a/src/skill_seekers/cli/pdf_scraper.py b/src/skill_seekers/cli/pdf_scraper.py
index c6cbfd2..e57657f 100644
--- a/src/skill_seekers/cli/pdf_scraper.py
+++ b/src/skill_seekers/cli/pdf_scraper.py
@@ -707,14 +707,14 @@ def main():
         # Note: Runs independently of workflow system (they complement each other)
         if getattr(args, "enhance_level", 0) > 0:
             # Traditional AI enhancement (API or LOCAL mode)
-            logger.info("\n" + "=" * 80)
-            logger.info("🤖 Traditional AI Enhancement")
-            logger.info("=" * 80)
+            print("\n" + "=" * 80)
+            print("🤖 Traditional AI Enhancement")
+            print("=" * 80)
             if workflow_executed:
-                logger.info(f"   Running after workflow: {workflow_name}")
-                logger.info("   (Workflow provides specialized analysis, enhancement provides general improvements)")
-            logger.info("   (Use --enhance-workflow for more control)")
-            logger.info("")
+                print(f"   Running after workflow: {workflow_name}")
+                print("   (Workflow provides specialized analysis, enhancement provides general improvements)")
+            print("   (Use --enhance-workflow for more control)")
+            print("")
             # Note: PDF scraper uses enhance_level instead of enhance/enhance_local
             # This is consistent with the new unified enhancement system
 
diff --git a/src/skill_seekers/cli/unified_enhancer.py b/src/skill_seekers/cli/unified_enhancer.py
index f8ec5c2..5cb7a26 100644
--- a/src/skill_seekers/cli/unified_enhancer.py
+++ b/src/skill_seekers/cli/unified_enhancer.py
@@ -25,7 +25,7 @@ import tempfile
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Literal
+from typing import Literal
 
 logger = logging.getLogger(__name__)
 
@@ -166,10 +166,7 @@ class UnifiedEnhancer:
             return items
 
         # Get appropriate prompt
-        if custom_prompt:
-            prompt_template = custom_prompt
-        else:
-            prompt_template = self._get_default_prompt(enhancement_type)
+        prompt_template = custom_prompt or self._get_default_prompt(enhancement_type)
 
         # Batch processing
         batch_size = (
diff --git a/src/skill_seekers/cli/unified_scraper.py b/src/skill_seekers/cli/unified_scraper.py
index af9a2e4..1f65203 100644
--- a/src/skill_seekers/cli/unified_scraper.py
+++ b/src/skill_seekers/cli/unified_scraper.py
@@ -571,7 +571,7 @@ class UnifiedScraper:
             if file_patterns:
                 logger.info(f"   File patterns: {', '.join(file_patterns)}")
 
-            results = analyze_codebase(
+            analyze_codebase(
                 directory=Path(local_path),
                 output_dir=temp_output,
                 depth=analysis_depth,
diff --git a/src/skill_seekers/mcp/tools/workflow_tools.py b/src/skill_seekers/mcp/tools/workflow_tools.py
index a2efe55..fb433ad 100644
--- a/src/skill_seekers/mcp/tools/workflow_tools.py
+++ b/src/skill_seekers/mcp/tools/workflow_tools.py
@@ -91,7 +91,7 @@ def _validate_yaml(text: str) -> dict:
 # ──────────────────────────────────────────────────────────────────────────────
 
 
-def list_workflows_tool(args: dict) -> list:
+def list_workflows_tool(_args: dict) -> list:
     """Return all workflows with name, description, and source."""
     result: list[dict[str, str]] = []
 
diff --git a/tests/test_create_integration_basic.py b/tests/test_create_integration_basic.py
index c1db129..45a6e1e 100644
--- a/tests/test_create_integration_basic.py
+++ b/tests/test_create_integration_basic.py
@@ -122,28 +122,28 @@ class TestCreateCommandArgvForwarding:
 
     def _make_args(self, **kwargs):
         import argparse
-        defaults = dict(
-            enhance_workflow=None,
-            enhance_stage=None,
-            var=None,
-            workflow_dry_run=False,
-            enhance_level=0,
-            output=None,
-            name=None,
-            description=None,
-            config=None,
-            api_key=None,
-            dry_run=False,
-            verbose=False,
-            quiet=False,
-            chunk_for_rag=False,
-            chunk_size=512,
-            chunk_overlap=50,
-            preset=None,
-            no_preserve_code_blocks=False,
-            no_preserve_paragraphs=False,
-            interactive_enhancement=False,
-        )
+        defaults = {
+            "enhance_workflow": None,
+            "enhance_stage": None,
+            "var": None,
+            "workflow_dry_run": False,
+            "enhance_level": 0,
+            "output": None,
+            "name": None,
+            "description": None,
+            "config": None,
+            "api_key": None,
+            "dry_run": False,
+            "verbose": False,
+            "quiet": False,
+            "chunk_for_rag": False,
+            "chunk_size": 512,
+            "chunk_overlap": 50,
+            "preset": None,
+            "no_preserve_code_blocks": False,
+            "no_preserve_paragraphs": False,
+            "interactive_enhancement": False,
+        }
         defaults.update(kwargs)
         return argparse.Namespace(**defaults)
 
diff --git a/tests/test_unified_parsers.py b/tests/test_unified_parsers.py
index 518dc84..7b7e1c4 100644
--- a/tests/test_unified_parsers.py
+++ b/tests/test_unified_parsers.py
@@ -86,7 +86,7 @@ Basic usage:
 .. code-block:: gdscript
 
     extends Node
-    
+
     func _ready():
         print("Hello, World!")
         position = Vector2(100, 100)
@@ -414,7 +414,7 @@ def calculate_average(numbers):
 
     def test_good_table_score(self):
         """Test quality score for good table."""
-        from skill_seekers.cli.parsers.extractors import QualityScorer, Table
+        from skill_seekers.cli.parsers.extractors import QualityScorer
 
         scorer = QualityScorer()
         good_table = Table(
diff --git a/tests/test_workflow_runner.py b/tests/test_workflow_runner.py
index 562030c..f547593 100644
--- a/tests/test_workflow_runner.py
+++ b/tests/test_workflow_runner.py
@@ -12,8 +12,7 @@ Covers:
 """
 
 import argparse
-import sys
-from unittest.mock import MagicMock, patch, call
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -186,7 +185,7 @@ class TestRunWorkflowsMultiple:
             m.workflow.description = "desc"
             m.workflow.stages = []
             # Track call order
-            m.run.side_effect = lambda *a, _n=wf_name, **kw: run_order.append(_n)
+            m.run.side_effect = lambda *_a, _n=wf_name, **_kw: run_order.append(_n)
             engines.append(m)
 
         with patch(
@@ -208,7 +207,7 @@ class TestRunWorkflowsMultiple:
         good_engine.workflow.description = "desc"
         good_engine.workflow.stages = []
 
-        def side_effect(name, **kwargs):
+        def side_effect(name, **_kwargs):
             if name == "bad-workflow":
                 raise FileNotFoundError("not found")
             return good_engine
@@ -341,9 +340,8 @@ class TestRunWorkflowsDryRun:
         with patch(
             "skill_seekers.cli.enhancement_workflow.WorkflowEngine",
             return_value=mock_engine,
-        ):
-            with pytest.raises(SystemExit) as exc:
-                run_workflows(args)
+        ), pytest.raises(SystemExit) as exc:
+            run_workflows(args)
 
         assert exc.value.code == 0
         mock_engine.preview.assert_called_once()
@@ -366,9 +364,8 @@ class TestRunWorkflowsDryRun:
         with patch(
             "skill_seekers.cli.enhancement_workflow.WorkflowEngine",
             side_effect=engines,
-        ):
-            with pytest.raises(SystemExit):
-                run_workflows(args)
+        ), pytest.raises(SystemExit):
+            run_workflows(args)
 
         for engine in engines:
             engine.preview.assert_called_once()
diff --git a/tests/test_workflow_tools_mcp.py b/tests/test_workflow_tools_mcp.py
index 286085b..e6c4f6d 100644
--- a/tests/test_workflow_tools_mcp.py
+++ b/tests/test_workflow_tools_mcp.py
@@ -9,7 +9,6 @@ Covers:
 """
 
 import textwrap
-from pathlib import Path
 from unittest.mock import patch
 
 import pytest
@@ -290,7 +289,7 @@ class TestDeleteWorkflowTool:
         wf.write_text(MINIMAL_YAML, encoding="utf-8")
 
         with _mock_bundled_names([]):
-            result = delete_workflow_tool({"name": "my-wf"})
+            delete_workflow_tool({"name": "my-wf"})
 
         assert not wf.exists()
 
diff --git a/tests/test_workflows_command.py b/tests/test_workflows_command.py
index ee8091f..23d6912 100644
--- a/tests/test_workflows_command.py
+++ b/tests/test_workflows_command.py
@@ -10,11 +10,9 @@ Covers:
 """
 
 import textwrap
-from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import pytest
-import yaml
 
 # Import the MODULE object (not just individual symbols) so we can patch it
 # directly via patch.object(). This survives any sys.modules manipulation by
@@ -168,9 +166,8 @@ class TestCmdCopy:
         assert dest.read_text(encoding="utf-8") == MINIMAL_YAML
 
     def test_copy_nonexistent(self, capsys, tmp_user_dir):
-        with _mock_bundled_text({}):
-            with _mock_bundled([]):
-                rc = cmd_copy(["ghost-workflow"])
+        with _mock_bundled_text({}), _mock_bundled([]):
+            rc = cmd_copy(["ghost-workflow"])
         assert rc == 1
         assert "not found" in capsys.readouterr().err.lower()
 
@@ -403,9 +400,8 @@ class TestMain:
         from skill_seekers.cli.workflows_command import main
 
         # tmp_user_dir is empty; mock bundled to return nothing
-        with _mock_bundled([]):
-            with pytest.raises(SystemExit) as exc:
-                main(["list"])
+        with _mock_bundled([]), pytest.raises(SystemExit) as exc:
+            main(["list"])
         assert exc.value.code == 0
 
     def test_main_validate_success(self, capsys, sample_yaml_file):
@@ -423,31 +419,27 @@ class TestMain:
         assert "name: test-workflow" in capsys.readouterr().out
 
     def test_main_show_not_found_exits_1(self, capsys, tmp_user_dir):
-        with patch.object(_wf_cmd, "_workflow_yaml_text", return_value=None):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["show", "ghost"])
+        with patch.object(_wf_cmd, "_workflow_yaml_text", return_value=None), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["show", "ghost"])
         assert exc.value.code == 1
 
     def test_main_copy_single(self, capsys, tmp_user_dir):
-        with _mock_bundled_text({"default": MINIMAL_YAML}):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["copy", "default"])
+        with _mock_bundled_text({"default": MINIMAL_YAML}), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["copy", "default"])
         assert exc.value.code == 0
         assert (tmp_user_dir / "default.yaml").exists()
 
     def test_main_copy_multiple(self, capsys, tmp_user_dir):
         texts = {"default": MINIMAL_YAML, "minimal": MINIMAL_YAML}
-        with _mock_bundled_text(texts):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["copy", "default", "minimal"])
+        with _mock_bundled_text(texts), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["copy", "default", "minimal"])
         assert exc.value.code == 0
         assert (tmp_user_dir / "default.yaml").exists()
         assert (tmp_user_dir / "minimal.yaml").exists()
 
     def test_main_copy_not_found_exits_1(self, capsys, tmp_user_dir):
-        with _mock_bundled_text({}), _mock_bundled([]):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["copy", "ghost"])
+        with _mock_bundled_text({}), _mock_bundled([]), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["copy", "ghost"])
         assert exc.value.code == 1
 
     def test_main_add_single_file(self, capsys, tmp_user_dir, sample_yaml_file):
@@ -484,32 +476,28 @@ class TestMain:
 
     def test_main_remove_single(self, capsys, tmp_user_dir):
         (tmp_user_dir / "my-wf.yaml").write_text(MINIMAL_YAML, encoding="utf-8")
-        with _mock_bundled([]):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["remove", "my-wf"])
+        with _mock_bundled([]), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["remove", "my-wf"])
         assert exc.value.code == 0
         assert not (tmp_user_dir / "my-wf.yaml").exists()
 
     def test_main_remove_multiple(self, capsys, tmp_user_dir):
         (tmp_user_dir / "wf-a.yaml").write_text(MINIMAL_YAML, encoding="utf-8")
         (tmp_user_dir / "wf-b.yaml").write_text(MINIMAL_YAML, encoding="utf-8")
-        with _mock_bundled([]):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["remove", "wf-a", "wf-b"])
+        with _mock_bundled([]), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["remove", "wf-a", "wf-b"])
         assert exc.value.code == 0
         assert not (tmp_user_dir / "wf-a.yaml").exists()
         assert not (tmp_user_dir / "wf-b.yaml").exists()
 
     def test_main_remove_bundled_refused(self, capsys, tmp_user_dir):
-        with _mock_bundled(["default"]):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["remove", "default"])
+        with _mock_bundled(["default"]), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["remove", "default"])
         assert exc.value.code == 1
 
     def test_main_remove_not_found_exits_1(self, capsys, tmp_user_dir):
-        with _mock_bundled([]):
-            with pytest.raises(SystemExit) as exc:
-                _wf_cmd.main(["remove", "ghost"])
+        with _mock_bundled([]), pytest.raises(SystemExit) as exc:
+            _wf_cmd.main(["remove", "ghost"])
         assert exc.value.code == 1