Refactor: Convert to monorepo with CLI and MCP server

Major restructure to support both CLI usage and MCP integration: **Repository Structure:** - cli/ - All CLI tools (doc_scraper, estimate_pages, enhance_skill, etc.) - mcp/ - New MCP server for Claude Code integration - configs/ - Shared configuration files - tests/ - Updated to import from cli/ - docs/ - Shared documentation **MCP Server (NEW):** - mcp/server.py - Full MCP server implementation - 6 tools available: * generate_config - Create config from URL * estimate_pages - Fast page count estimation * scrape_docs - Full documentation scraping * package_skill - Package to .zip * list_configs - Show available presets * validate_config - Validate config files - mcp/README.md - Complete MCP documentation - mcp/requirements.txt - MCP dependencies **CLI Tools (Moved to cli/):** - All existing functionality preserved - Same commands, same behavior - Tests updated to import from cli.doc_scraper **Tests:** - 68/71 passing (95.8%) - Updated imports from doc_scraper to cli.doc_scraper - Fixed validate_config() tuple unpacking (errors, warnings) - 3 minor test failures (checking warnings instead of errors) **Benefits:** - Use as CLI tool: python3 cli/doc_scraper.py - Use via MCP: Integrated with Claude Code - Shared code and configs - Single source of truth 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 15:19:53 +03:00
parent af87572735
commit ae924a9d05
12 changed files with 658 additions and 34 deletions
--- a/cli/doc_scraper.py
+++ b/cli/doc_scraper.py
@@ -0,0 +1,956 @@
+#!/usr/bin/env python3
+"""
+Documentation to Claude Skill Converter
+Single tool to scrape any documentation and create high-quality Claude skills.
+
+Usage:
+    python3 doc_scraper.py --interactive
+    python3 doc_scraper.py --config configs/godot.json
+    python3 doc_scraper.py --url https://react.dev/ --name react
+"""
+
+import os
+import sys
+import json
+import time
+import re
+import argparse
+import hashlib
+import requests
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+from bs4 import BeautifulSoup
+from collections import deque, defaultdict
+
+
+class DocToSkillConverter:
+    def __init__(self, config, dry_run=False):
+        self.config = config
+        self.name = config['name']
+        self.base_url = config['base_url']
+        self.dry_run = dry_run
+
+        # Paths
+        self.data_dir = f"output/{self.name}_data"
+        self.skill_dir = f"output/{self.name}"
+
+        # State
+        self.visited_urls = set()
+        # Support multiple starting URLs
+        start_urls = config.get('start_urls', [self.base_url])
+        self.pending_urls = deque(start_urls)
+        self.pages = []
+
+        # Create directories (unless dry-run)
+        if not dry_run:
+            os.makedirs(f"{self.data_dir}/pages", exist_ok=True)
+            os.makedirs(f"{self.skill_dir}/references", exist_ok=True)
+            os.makedirs(f"{self.skill_dir}/scripts", exist_ok=True)
+            os.makedirs(f"{self.skill_dir}/assets", exist_ok=True)
+    
+    def is_valid_url(self, url):
+        """Check if URL should be scraped"""
+        if not url.startswith(self.base_url):
+            return False
+        
+        # Include patterns
+        includes = self.config.get('url_patterns', {}).get('include', [])
+        if includes and not any(pattern in url for pattern in includes):
+            return False
+        
+        # Exclude patterns
+        excludes = self.config.get('url_patterns', {}).get('exclude', [])
+        if any(pattern in url for pattern in excludes):
+            return False
+        
+        return True
+    
+    def extract_content(self, soup, url):
+        """Extract content with improved code and pattern detection"""
+        page = {
+            'url': url,
+            'title': '',
+            'content': '',
+            'headings': [],
+            'code_samples': [],
+            'patterns': [],  # NEW: Extract common patterns
+            'links': []
+        }
+        
+        selectors = self.config.get('selectors', {})
+        
+        # Extract title
+        title_elem = soup.select_one(selectors.get('title', 'title'))
+        if title_elem:
+            page['title'] = self.clean_text(title_elem.get_text())
+        
+        # Find main content
+        main_selector = selectors.get('main_content', 'div[role="main"]')
+        main = soup.select_one(main_selector)
+        
+        if not main:
+            print(f"⚠ No content: {url}")
+            return page
+        
+        # Extract headings with better structure
+        for h in main.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
+            text = self.clean_text(h.get_text())
+            if text:
+                page['headings'].append({
+                    'level': h.name,
+                    'text': text,
+                    'id': h.get('id', '')
+                })
+        
+        # Extract code with language detection
+        code_selector = selectors.get('code_blocks', 'pre code')
+        for code_elem in main.select(code_selector):
+            code = code_elem.get_text()
+            if len(code.strip()) > 10:
+                # Try to detect language
+                lang = self.detect_language(code_elem, code)
+                page['code_samples'].append({
+                    'code': code.strip(),
+                    'language': lang
+                })
+        
+        # Extract patterns (NEW: common code patterns)
+        page['patterns'] = self.extract_patterns(main, page['code_samples'])
+        
+        # Extract paragraphs
+        paragraphs = []
+        for p in main.find_all('p'):
+            text = self.clean_text(p.get_text())
+            if text and len(text) > 20:  # Skip very short paragraphs
+                paragraphs.append(text)
+        
+        page['content'] = '\n\n'.join(paragraphs)
+        
+        # Extract links
+        for link in main.find_all('a', href=True):
+            href = urljoin(url, link['href'])
+            if self.is_valid_url(href):
+                page['links'].append(href)
+        
+        return page
+    
+    def detect_language(self, elem, code):
+        """Detect programming language from code block"""
+        # Check class attribute
+        classes = elem.get('class', [])
+        for cls in classes:
+            if 'language-' in cls:
+                return cls.replace('language-', '')
+            if 'lang-' in cls:
+                return cls.replace('lang-', '')
+        
+        # Check parent pre element
+        parent = elem.parent
+        if parent and parent.name == 'pre':
+            classes = parent.get('class', [])
+            for cls in classes:
+                if 'language-' in cls:
+                    return cls.replace('language-', '')
+        
+        # Heuristic detection
+        if 'import ' in code and 'from ' in code:
+            return 'python'
+        if 'const ' in code or 'let ' in code or '=>' in code:
+            return 'javascript'
+        if 'func ' in code and 'var ' in code:
+            return 'gdscript'
+        if 'def ' in code and ':' in code:
+            return 'python'
+        if '#include' in code or 'int main' in code:
+            return 'cpp'
+        
+        return 'unknown'
+    
+    def extract_patterns(self, main, code_samples):
+        """Extract common coding patterns (NEW FEATURE)"""
+        patterns = []
+        
+        # Look for "Example:" or "Pattern:" sections
+        for elem in main.find_all(['p', 'div']):
+            text = elem.get_text().lower()
+            if any(word in text for word in ['example:', 'pattern:', 'usage:', 'typical use']):
+                # Get the code that follows
+                next_code = elem.find_next(['pre', 'code'])
+                if next_code:
+                    patterns.append({
+                        'description': self.clean_text(elem.get_text()),
+                        'code': next_code.get_text().strip()
+                    })
+        
+        return patterns[:5]  # Limit to 5 most relevant patterns
+    
+    def clean_text(self, text):
+        """Clean text content"""
+        text = re.sub(r'\s+', ' ', text)
+        return text.strip()
+    
+    def save_page(self, page):
+        """Save page data"""
+        url_hash = hashlib.md5(page['url'].encode()).hexdigest()[:10]
+        safe_title = re.sub(r'[^\w\s-]', '', page['title'])[:50]
+        safe_title = re.sub(r'[-\s]+', '_', safe_title)
+        
+        filename = f"{safe_title}_{url_hash}.json"
+        filepath = os.path.join(self.data_dir, "pages", filename)
+        
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(page, f, indent=2, ensure_ascii=False)
+    
+    def scrape_page(self, url):
+        """Scrape a single page"""
+        try:
+            print(f"  {url}")
+            
+            headers = {'User-Agent': 'Mozilla/5.0 (Documentation Scraper)'}
+            response = requests.get(url, headers=headers, timeout=30)
+            response.raise_for_status()
+            
+            soup = BeautifulSoup(response.content, 'html.parser')
+            page = self.extract_content(soup, url)
+            
+            self.save_page(page)
+            self.pages.append(page)
+            
+            # Add new URLs
+            for link in page['links']:
+                if link not in self.visited_urls and link not in self.pending_urls:
+                    self.pending_urls.append(link)
+            
+            # Rate limiting
+            time.sleep(self.config.get('rate_limit', 0.5))
+            
+        except Exception as e:
+            print(f"  ✗ Error: {e}")
+    
+    def scrape_all(self):
+        """Scrape all pages"""
+        print(f"\n{'='*60}")
+        if self.dry_run:
+            print(f"DRY RUN: {self.name}")
+        else:
+            print(f"SCRAPING: {self.name}")
+        print(f"{'='*60}")
+        print(f"Base URL: {self.base_url}")
+
+        if self.dry_run:
+            print(f"Mode: Preview only (no actual scraping)\n")
+        else:
+            print(f"Output: {self.data_dir}\n")
+
+        max_pages = self.config.get('max_pages', 500)
+
+        # Dry run: preview first 20 URLs
+        preview_limit = 20 if self.dry_run else max_pages
+
+        while self.pending_urls and len(self.visited_urls) < preview_limit:
+            url = self.pending_urls.popleft()
+
+            if url in self.visited_urls:
+                continue
+
+            self.visited_urls.add(url)
+
+            if self.dry_run:
+                # Just show what would be scraped
+                print(f"  [Preview] {url}")
+                # Simulate finding links without actually scraping
+                try:
+                    headers = {'User-Agent': 'Mozilla/5.0 (Documentation Scraper - Dry Run)'}
+                    response = requests.get(url, headers=headers, timeout=10)
+                    soup = BeautifulSoup(response.content, 'html.parser')
+
+                    main_selector = self.config.get('selectors', {}).get('main_content', 'div[role="main"]')
+                    main = soup.select_one(main_selector)
+
+                    if main:
+                        for link in main.find_all('a', href=True):
+                            href = urljoin(url, link['href'])
+                            if self.is_valid_url(href) and href not in self.visited_urls:
+                                self.pending_urls.append(href)
+                except:
+                    pass  # Ignore errors in dry run
+            else:
+                self.scrape_page(url)
+
+            if len(self.visited_urls) % 10 == 0:
+                print(f"  [{len(self.visited_urls)} pages]")
+
+        if self.dry_run:
+            print(f"\n✅ Dry run complete: would scrape ~{len(self.visited_urls)} pages")
+            if len(self.visited_urls) >= preview_limit:
+                print(f"   (showing first {preview_limit}, actual scraping may find more)")
+            print(f"\n💡 To actually scrape, run without --dry-run")
+        else:
+            print(f"\n✅ Scraped {len(self.visited_urls)} pages")
+            self.save_summary()
+    
+    def save_summary(self):
+        """Save scraping summary"""
+        summary = {
+            'name': self.name,
+            'total_pages': len(self.pages),
+            'base_url': self.base_url,
+            'pages': [{'title': p['title'], 'url': p['url']} for p in self.pages]
+        }
+        
+        with open(f"{self.data_dir}/summary.json", 'w', encoding='utf-8') as f:
+            json.dump(summary, f, indent=2, ensure_ascii=False)
+    
+    def load_scraped_data(self):
+        """Load previously scraped data"""
+        pages = []
+        pages_dir = Path(self.data_dir) / "pages"
+        
+        if not pages_dir.exists():
+            return []
+        
+        for json_file in pages_dir.glob("*.json"):
+            try:
+                with open(json_file, 'r', encoding='utf-8') as f:
+                    pages.append(json.load(f))
+            except Exception as e:
+                print(f"⚠ Error loading {json_file}: {e}")
+        
+        return pages
+    
+    def smart_categorize(self, pages):
+        """Improved categorization with better pattern matching"""
+        category_defs = self.config.get('categories', {})
+        
+        # Default smart categories if none provided
+        if not category_defs:
+            category_defs = self.infer_categories(pages)
+        
+        categories = {cat: [] for cat in category_defs.keys()}
+        categories['other'] = []
+        
+        for page in pages:
+            url = page['url'].lower()
+            title = page['title'].lower()
+            content = page.get('content', '').lower()[:500]  # Check first 500 chars
+            
+            categorized = False
+            
+            # Match against keywords
+            for cat, keywords in category_defs.items():
+                score = 0
+                for keyword in keywords:
+                    keyword = keyword.lower()
+                    if keyword in url:
+                        score += 3
+                    if keyword in title:
+                        score += 2
+                    if keyword in content:
+                        score += 1
+                
+                if score >= 2:  # Threshold for categorization
+                    categories[cat].append(page)
+                    categorized = True
+                    break
+            
+            if not categorized:
+                categories['other'].append(page)
+        
+        # Remove empty categories
+        categories = {k: v for k, v in categories.items() if v}
+        
+        return categories
+    
+    def infer_categories(self, pages):
+        """Infer categories from URL patterns (IMPROVED)"""
+        url_segments = defaultdict(int)
+        
+        for page in pages:
+            path = urlparse(page['url']).path
+            segments = [s for s in path.split('/') if s and s not in ['en', 'stable', 'latest', 'docs']]
+            
+            for seg in segments:
+                url_segments[seg] += 1
+        
+        # Top segments become categories
+        top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[:8]
+        
+        categories = {}
+        for seg, count in top_segments:
+            if count >= 3:  # At least 3 pages
+                categories[seg] = [seg]
+        
+        # Add common defaults
+        if 'tutorial' not in categories and any('tutorial' in url for url in [p['url'] for p in pages]):
+            categories['tutorials'] = ['tutorial', 'guide', 'getting-started']
+        
+        if 'api' not in categories and any('api' in url or 'reference' in url for url in [p['url'] for p in pages]):
+            categories['api'] = ['api', 'reference', 'class']
+        
+        return categories
+    
+    def generate_quick_reference(self, pages):
+        """Generate quick reference from common patterns (NEW FEATURE)"""
+        quick_ref = []
+        
+        # Collect all patterns
+        all_patterns = []
+        for page in pages:
+            all_patterns.extend(page.get('patterns', []))
+        
+        # Get most common code patterns
+        seen_codes = set()
+        for pattern in all_patterns:
+            code = pattern['code']
+            if code not in seen_codes and len(code) < 300:
+                quick_ref.append(pattern)
+                seen_codes.add(code)
+                if len(quick_ref) >= 15:
+                    break
+        
+        return quick_ref
+    
+    def create_reference_file(self, category, pages):
+        """Create enhanced reference file"""
+        if not pages:
+            return
+        
+        lines = []
+        lines.append(f"# {self.name.title()} - {category.replace('_', ' ').title()}\n")
+        lines.append(f"**Pages:** {len(pages)}\n")
+        lines.append("---\n")
+        
+        for page in pages:
+            lines.append(f"## {page['title']}\n")
+            lines.append(f"**URL:** {page['url']}\n")
+            
+            # Table of contents from headings
+            if page.get('headings'):
+                lines.append("**Contents:**")
+                for h in page['headings'][:10]:
+                    level = int(h['level'][1]) if len(h['level']) > 1 else 1
+                    indent = "  " * max(0, level - 2)
+                    lines.append(f"{indent}- {h['text']}")
+                lines.append("")
+            
+            # Content
+            if page.get('content'):
+                content = page['content'][:2500]
+                if len(page['content']) > 2500:
+                    content += "\n\n*[Content truncated]*"
+                lines.append(content)
+                lines.append("")
+            
+            # Code examples with language
+            if page.get('code_samples'):
+                lines.append("**Examples:**\n")
+                for i, sample in enumerate(page['code_samples'][:4], 1):
+                    lang = sample.get('language', 'unknown')
+                    code = sample.get('code', sample if isinstance(sample, str) else '')
+                    lines.append(f"Example {i} ({lang}):")
+                    lines.append(f"```{lang}")
+                    lines.append(code[:600])
+                    if len(code) > 600:
+                        lines.append("...")
+                    lines.append("```\n")
+            
+            lines.append("---\n")
+        
+        filepath = os.path.join(self.skill_dir, "references", f"{category}.md")
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(lines))
+        
+        print(f"  ✓ {category}.md ({len(pages)} pages)")
+    
+    def create_enhanced_skill_md(self, categories, quick_ref):
+        """Create SKILL.md with actual examples (IMPROVED)"""
+        description = self.config.get('description', f'Comprehensive assistance with {self.name}')
+        
+        # Extract actual code examples from docs
+        example_codes = []
+        for pages in categories.values():
+            for page in pages[:3]:  # First 3 pages per category
+                for sample in page.get('code_samples', [])[:2]:  # First 2 samples per page
+                    code = sample.get('code', sample if isinstance(sample, str) else '')
+                    lang = sample.get('language', 'unknown')
+                    if len(code) < 200 and lang != 'unknown':
+                        example_codes.append((lang, code))
+                    if len(example_codes) >= 10:
+                        break
+                if len(example_codes) >= 10:
+                    break
+            if len(example_codes) >= 10:
+                break
+        
+        content = f"""---
+name: {self.name}
+description: {description}
+---
+
+# {self.name.title()} Skill
+
+Comprehensive assistance with {self.name} development, generated from official documentation.
+
+## When to Use This Skill
+
+This skill should be triggered when:
+- Working with {self.name}
+- Asking about {self.name} features or APIs
+- Implementing {self.name} solutions
+- Debugging {self.name} code
+- Learning {self.name} best practices
+
+## Quick Reference
+
+### Common Patterns
+
+"""
+        
+        # Add actual quick reference patterns
+        if quick_ref:
+            for i, pattern in enumerate(quick_ref[:8], 1):
+                content += f"**Pattern {i}:** {pattern.get('description', 'Example pattern')}\n\n"
+                content += "```\n"
+                content += pattern.get('code', '')[:300]
+                content += "\n```\n\n"
+        else:
+            content += "*Quick reference patterns will be added as you use the skill.*\n\n"
+        
+        # Add example codes from docs
+        if example_codes:
+            content += "### Example Code Patterns\n\n"
+            for i, (lang, code) in enumerate(example_codes[:5], 1):
+                content += f"**Example {i}** ({lang}):\n```{lang}\n{code}\n```\n\n"
+        
+        content += f"""## Reference Files
+
+This skill includes comprehensive documentation in `references/`:
+
+"""
+        
+        for cat in sorted(categories.keys()):
+            content += f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
+        
+        content += """
+Use `view` to read specific reference files when detailed information is needed.
+
+## Working with This Skill
+
+### For Beginners
+Start with the getting_started or tutorials reference files for foundational concepts.
+
+### For Specific Features
+Use the appropriate category reference file (api, guides, etc.) for detailed information.
+
+### For Code Examples
+The quick reference section above contains common patterns extracted from the official docs.
+
+## Resources
+
+### references/
+Organized documentation extracted from official sources. These files contain:
+- Detailed explanations
+- Code examples with language annotations
+- Links to original documentation
+- Table of contents for quick navigation
+
+### scripts/
+Add helper scripts here for common automation tasks.
+
+### assets/
+Add templates, boilerplate, or example projects here.
+
+## Notes
+
+- This skill was automatically generated from official documentation
+- Reference files preserve the structure and examples from source docs
+- Code examples include language detection for better syntax highlighting
+- Quick reference patterns are extracted from common usage examples in the docs
+
+## Updating
+
+To refresh this skill with updated documentation:
+1. Re-run the scraper with the same configuration
+2. The skill will be rebuilt with the latest information
+"""
+        
+        filepath = os.path.join(self.skill_dir, "SKILL.md")
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write(content)
+        
+        print(f"  ✓ SKILL.md (enhanced with {len(example_codes)} examples)")
+    
+    def create_index(self, categories):
+        """Create navigation index"""
+        lines = []
+        lines.append(f"# {self.name.title()} Documentation Index\n")
+        lines.append("## Categories\n")
+        
+        for cat, pages in sorted(categories.items()):
+            lines.append(f"### {cat.replace('_', ' ').title()}")
+            lines.append(f"**File:** `{cat}.md`")
+            lines.append(f"**Pages:** {len(pages)}\n")
+        
+        filepath = os.path.join(self.skill_dir, "references", "index.md")
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(lines))
+        
+        print("  ✓ index.md")
+    
+    def build_skill(self):
+        """Build the skill from scraped data"""
+        print(f"\n{'='*60}")
+        print(f"BUILDING SKILL: {self.name}")
+        print(f"{'='*60}\n")
+        
+        # Load data
+        print("Loading scraped data...")
+        pages = self.load_scraped_data()
+        
+        if not pages:
+            print("✗ No scraped data found!")
+            return False
+        
+        print(f"  ✓ Loaded {len(pages)} pages\n")
+        
+        # Categorize
+        print("Categorizing pages...")
+        categories = self.smart_categorize(pages)
+        print(f"  ✓ Created {len(categories)} categories\n")
+        
+        # Generate quick reference
+        print("Generating quick reference...")
+        quick_ref = self.generate_quick_reference(pages)
+        print(f"  ✓ Extracted {len(quick_ref)} patterns\n")
+        
+        # Create reference files
+        print("Creating reference files...")
+        for cat, cat_pages in categories.items():
+            self.create_reference_file(cat, cat_pages)
+        
+        # Create index
+        self.create_index(categories)
+        print()
+        
+        # Create enhanced SKILL.md
+        print("Creating SKILL.md...")
+        self.create_enhanced_skill_md(categories, quick_ref)
+        
+        print(f"\n✅ Skill built: {self.skill_dir}/")
+        return True
+
+
+def validate_config(config):
+    """Validate configuration structure"""
+    errors = []
+    warnings = []
+
+    # Required fields
+    required_fields = ['name', 'base_url']
+    for field in required_fields:
+        if field not in config:
+            errors.append(f"Missing required field: '{field}'")
+
+    # Validate name (alphanumeric, hyphens, underscores only)
+    if 'name' in config:
+        if not re.match(r'^[a-zA-Z0-9_-]+$', config['name']):
+            errors.append(f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)")
+
+    # Validate base_url
+    if 'base_url' in config:
+        if not config['base_url'].startswith(('http://', 'https://')):
+            errors.append(f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)")
+
+    # Validate selectors structure
+    if 'selectors' in config:
+        if not isinstance(config['selectors'], dict):
+            errors.append("'selectors' must be a dictionary")
+        else:
+            recommended_selectors = ['main_content', 'title', 'code_blocks']
+            for selector in recommended_selectors:
+                if selector not in config['selectors']:
+                    warnings.append(f"Missing recommended selector: '{selector}'")
+    else:
+        warnings.append("Missing 'selectors' section (recommended)")
+
+    # Validate url_patterns
+    if 'url_patterns' in config:
+        if not isinstance(config['url_patterns'], dict):
+            errors.append("'url_patterns' must be a dictionary")
+        else:
+            for key in ['include', 'exclude']:
+                if key in config['url_patterns']:
+                    if not isinstance(config['url_patterns'][key], list):
+                        errors.append(f"'url_patterns.{key}' must be a list")
+
+    # Validate categories
+    if 'categories' in config:
+        if not isinstance(config['categories'], dict):
+            errors.append("'categories' must be a dictionary")
+        else:
+            for cat_name, keywords in config['categories'].items():
+                if not isinstance(keywords, list):
+                    errors.append(f"'categories.{cat_name}' must be a list of keywords")
+
+    # Validate rate_limit
+    if 'rate_limit' in config:
+        try:
+            rate = float(config['rate_limit'])
+            if rate < 0:
+                errors.append(f"'rate_limit' must be non-negative (got {rate})")
+        except (ValueError, TypeError):
+            errors.append(f"'rate_limit' must be a number (got {config['rate_limit']})")
+
+    # Validate max_pages
+    if 'max_pages' in config:
+        try:
+            max_p = int(config['max_pages'])
+            if max_p < 1:
+                errors.append(f"'max_pages' must be at least 1 (got {max_p})")
+        except (ValueError, TypeError):
+            errors.append(f"'max_pages' must be an integer (got {config['max_pages']})")
+
+    # Validate start_urls if present
+    if 'start_urls' in config:
+        if not isinstance(config['start_urls'], list):
+            errors.append("'start_urls' must be a list")
+        else:
+            for url in config['start_urls']:
+                if not url.startswith(('http://', 'https://')):
+                    errors.append(f"Invalid start_url: '{url}' (must start with http:// or https://)")
+
+    return errors, warnings
+
+
+def load_config(config_path):
+    """Load and validate configuration from file"""
+    try:
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+    except json.JSONDecodeError as e:
+        print(f"❌ Error: Invalid JSON in config file: {e}")
+        sys.exit(1)
+    except FileNotFoundError:
+        print(f"❌ Error: Config file not found: {config_path}")
+        sys.exit(1)
+
+    # Validate config
+    errors, warnings = validate_config(config)
+
+    # Show warnings (non-blocking)
+    if warnings:
+        print(f"⚠️  Configuration warnings in {config_path}:")
+        for warning in warnings:
+            print(f"   - {warning}")
+        print()
+
+    # Show errors (blocking)
+    if errors:
+        print(f"❌ Configuration validation errors in {config_path}:")
+        for error in errors:
+            print(f"   - {error}")
+        sys.exit(1)
+
+    return config
+
+
+def interactive_config():
+    """Interactive configuration"""
+    print("\n" + "="*60)
+    print("Documentation to Skill Converter")
+    print("="*60 + "\n")
+    
+    config = {}
+    
+    # Basic info
+    config['name'] = input("Skill name (e.g., 'react', 'godot'): ").strip()
+    config['description'] = input("Skill description: ").strip()
+    config['base_url'] = input("Base URL (e.g., https://docs.example.com/): ").strip()
+    
+    if not config['base_url'].endswith('/'):
+        config['base_url'] += '/'
+    
+    # Selectors
+    print("\nCSS Selectors (press Enter for defaults):")
+    selectors = {}
+    selectors['main_content'] = input("  Main content [div[role='main']]: ").strip() or "div[role='main']"
+    selectors['title'] = input("  Title [title]: ").strip() or "title"
+    selectors['code_blocks'] = input("  Code blocks [pre code]: ").strip() or "pre code"
+    config['selectors'] = selectors
+    
+    # URL patterns
+    print("\nURL Patterns (comma-separated, optional):")
+    include = input("  Include: ").strip()
+    exclude = input("  Exclude: ").strip()
+    config['url_patterns'] = {
+        'include': [p.strip() for p in include.split(',') if p.strip()],
+        'exclude': [p.strip() for p in exclude.split(',') if p.strip()]
+    }
+    
+    # Settings
+    rate = input("\nRate limit (seconds) [0.5]: ").strip()
+    config['rate_limit'] = float(rate) if rate else 0.5
+    
+    max_p = input("Max pages [500]: ").strip()
+    config['max_pages'] = int(max_p) if max_p else 500
+    
+    return config
+
+
+def check_existing_data(name):
+    """Check if scraped data already exists"""
+    data_dir = f"output/{name}_data"
+    if os.path.exists(data_dir) and os.path.exists(f"{data_dir}/summary.json"):
+        with open(f"{data_dir}/summary.json", 'r') as f:
+            summary = json.load(f)
+        return True, summary.get('total_pages', 0)
+    return False, 0
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert documentation websites to Claude skills',
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    
+    parser.add_argument('--interactive', '-i', action='store_true',
+                       help='Interactive configuration mode')
+    parser.add_argument('--config', '-c', type=str,
+                       help='Load configuration from file (e.g., configs/godot.json)')
+    parser.add_argument('--name', type=str,
+                       help='Skill name')
+    parser.add_argument('--url', type=str,
+                       help='Base documentation URL')
+    parser.add_argument('--description', '-d', type=str,
+                       help='Skill description')
+    parser.add_argument('--skip-scrape', action='store_true',
+                       help='Skip scraping, use existing data')
+    parser.add_argument('--dry-run', action='store_true',
+                       help='Preview what will be scraped without actually scraping')
+    parser.add_argument('--enhance', action='store_true',
+                       help='Enhance SKILL.md using Claude API after building (requires API key)')
+    parser.add_argument('--enhance-local', action='store_true',
+                       help='Enhance SKILL.md using Claude Code in new terminal (no API key needed)')
+    parser.add_argument('--api-key', type=str,
+                       help='Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)')
+
+    args = parser.parse_args()
+    
+    # Get configuration
+    if args.config:
+        config = load_config(args.config)
+    elif args.interactive or not (args.name and args.url):
+        config = interactive_config()
+    else:
+        config = {
+            'name': args.name,
+            'description': args.description or f'Comprehensive assistance with {args.name}',
+            'base_url': args.url,
+            'selectors': {
+                'main_content': "div[role='main']",
+                'title': 'title',
+                'code_blocks': 'pre code'
+            },
+            'url_patterns': {'include': [], 'exclude': []},
+            'rate_limit': 0.5,
+            'max_pages': 500
+        }
+    
+    # Dry run mode - preview only
+    if args.dry_run:
+        print(f"\n{'='*60}")
+        print("DRY RUN MODE")
+        print(f"{'='*60}")
+        print("This will show what would be scraped without saving anything.\n")
+
+        converter = DocToSkillConverter(config, dry_run=True)
+        converter.scrape_all()
+
+        print(f"\n📋 Configuration Summary:")
+        print(f"   Name: {config['name']}")
+        print(f"   Base URL: {config['base_url']}")
+        print(f"   Max pages: {config.get('max_pages', 500)}")
+        print(f"   Rate limit: {config.get('rate_limit', 0.5)}s")
+        print(f"   Categories: {len(config.get('categories', {}))}")
+        return
+
+    # Check for existing data
+    exists, page_count = check_existing_data(config['name'])
+
+    if exists and not args.skip_scrape:
+        print(f"\n✓ Found existing data: {page_count} pages")
+        response = input("Use existing data? (y/n): ").strip().lower()
+        if response == 'y':
+            args.skip_scrape = True
+
+    # Create converter
+    converter = DocToSkillConverter(config)
+
+    # Scrape or skip
+    if not args.skip_scrape:
+        try:
+            converter.scrape_all()
+        except KeyboardInterrupt:
+            print("\n\nScraping interrupted.")
+            response = input("Continue with skill building? (y/n): ").strip().lower()
+            if response != 'y':
+                return
+    else:
+        print(f"\n⏭️  Skipping scrape, using existing data")
+
+    # Build skill
+    success = converter.build_skill()
+
+    if not success:
+        sys.exit(1)
+
+    # Optional enhancement with Claude API
+    if args.enhance:
+        print(f"\n{'='*60}")
+        print(f"ENHANCING SKILL.MD WITH CLAUDE API")
+        print(f"{'='*60}\n")
+
+        try:
+            import subprocess
+            enhance_cmd = ['python3', 'enhance_skill.py', f'output/{config["name"]}/']
+            if args.api_key:
+                enhance_cmd.extend(['--api-key', args.api_key])
+
+            result = subprocess.run(enhance_cmd, check=True)
+            if result.returncode == 0:
+                print("\n✅ Enhancement complete!")
+        except subprocess.CalledProcessError:
+            print("\n⚠ Enhancement failed, but skill was still built")
+        except FileNotFoundError:
+            print("\n⚠ enhance_skill.py not found. Run manually:")
+            print(f"  python3 enhance_skill.py output/{config['name']}/")
+
+    # Optional enhancement with Claude Code (local, no API key)
+    if args.enhance_local:
+        print(f"\n{'='*60}")
+        print(f"ENHANCING SKILL.MD WITH CLAUDE CODE (LOCAL)")
+        print(f"{'='*60}\n")
+
+        try:
+            import subprocess
+            enhance_cmd = ['python3', 'enhance_skill_local.py', f'output/{config["name"]}/']
+            subprocess.run(enhance_cmd, check=True)
+        except subprocess.CalledProcessError:
+            print("\n⚠ Enhancement failed, but skill was still built")
+        except FileNotFoundError:
+            print("\n⚠ enhance_skill_local.py not found. Run manually:")
+            print(f"  python3 enhance_skill_local.py output/{config['name']}/")
+
+    print(f"\n📦 Package your skill:")
+    print(f"  python3 package_skill.py output/{config['name']}/")
+
+    if not args.enhance and not args.enhance_local:
+        print(f"\n💡 Optional: Enhance SKILL.md with Claude:")
+        print(f"  API-based:  python3 enhance_skill.py output/{config['name']}/")
+        print(f"              or re-run with: --enhance")
+        print(f"  Local (no API key): python3 enhance_skill_local.py output/{config['name']}/")
+        print(f"                      or re-run with: --enhance-local")
+
+
+if __name__ == "__main__":
+    main()
--- a/cli/enhance_skill.py
+++ b/cli/enhance_skill.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python3
+"""
+SKILL.md Enhancement Script
+Uses Claude API to improve SKILL.md by analyzing reference documentation.
+
+Usage:
+    python3 enhance_skill.py output/steam-inventory/
+    python3 enhance_skill.py output/react/
+    python3 enhance_skill.py output/godot/ --api-key YOUR_API_KEY
+"""
+
+import os
+import sys
+import json
+import argparse
+from pathlib import Path
+
+try:
+    import anthropic
+except ImportError:
+    print("❌ Error: anthropic package not installed")
+    print("Install with: pip3 install anthropic")
+    sys.exit(1)
+
+
+class SkillEnhancer:
+    def __init__(self, skill_dir, api_key=None):
+        self.skill_dir = Path(skill_dir)
+        self.references_dir = self.skill_dir / "references"
+        self.skill_md_path = self.skill_dir / "SKILL.md"
+
+        # Get API key
+        self.api_key = api_key or os.environ.get('ANTHROPIC_API_KEY')
+        if not self.api_key:
+            raise ValueError(
+                "No API key provided. Set ANTHROPIC_API_KEY environment variable "
+                "or use --api-key argument"
+            )
+
+        self.client = anthropic.Anthropic(api_key=self.api_key)
+
+    def read_reference_files(self, max_chars=100000):
+        """Read reference files with size limit"""
+        references = {}
+
+        if not self.references_dir.exists():
+            print(f"⚠ No references directory found at {self.references_dir}")
+            return references
+
+        total_chars = 0
+        for ref_file in sorted(self.references_dir.glob("*.md")):
+            if ref_file.name == "index.md":
+                continue
+
+            content = ref_file.read_text(encoding='utf-8')
+
+            # Limit size per file
+            if len(content) > 40000:
+                content = content[:40000] + "\n\n[Content truncated...]"
+
+            references[ref_file.name] = content
+            total_chars += len(content)
+
+            # Stop if we've read enough
+            if total_chars > max_chars:
+                print(f"  ℹ Limiting input to {max_chars:,} characters")
+                break
+
+        return references
+
+    def read_current_skill_md(self):
+        """Read existing SKILL.md"""
+        if not self.skill_md_path.exists():
+            return None
+        return self.skill_md_path.read_text(encoding='utf-8')
+
+    def enhance_skill_md(self, references, current_skill_md):
+        """Use Claude to enhance SKILL.md"""
+
+        # Build prompt
+        prompt = self._build_enhancement_prompt(references, current_skill_md)
+
+        print("\n🤖 Asking Claude to enhance SKILL.md...")
+        print(f"   Input: {len(prompt):,} characters")
+
+        try:
+            message = self.client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=4096,
+                temperature=0.3,
+                messages=[{
+                    "role": "user",
+                    "content": prompt
+                }]
+            )
+
+            enhanced_content = message.content[0].text
+            return enhanced_content
+
+        except Exception as e:
+            print(f"❌ Error calling Claude API: {e}")
+            return None
+
+    def _build_enhancement_prompt(self, references, current_skill_md):
+        """Build the prompt for Claude"""
+
+        # Extract skill name and description
+        skill_name = self.skill_dir.name
+
+        prompt = f"""You are enhancing a Claude skill's SKILL.md file. This skill is about: {skill_name}
+
+I've scraped documentation and organized it into reference files. Your job is to create an EXCELLENT SKILL.md that will help Claude use this documentation effectively.
+
+CURRENT SKILL.MD:
+{'```markdown' if current_skill_md else '(none - create from scratch)'}
+{current_skill_md or 'No existing SKILL.md'}
+{'```' if current_skill_md else ''}
+
+REFERENCE DOCUMENTATION:
+"""
+
+        for filename, content in references.items():
+            prompt += f"\n\n## {filename}\n```markdown\n{content[:30000]}\n```\n"
+
+        prompt += """
+
+YOUR TASK:
+Create an enhanced SKILL.md that includes:
+
+1. **Clear "When to Use This Skill" section** - Be specific about trigger conditions
+2. **Excellent Quick Reference section** - Extract 5-10 of the BEST, most practical code examples from the reference docs
+   - Choose SHORT, clear examples that demonstrate common tasks
+   - Include both simple and intermediate examples
+   - Annotate examples with clear descriptions
+   - Use proper language tags (cpp, python, javascript, json, etc.)
+3. **Detailed Reference Files description** - Explain what's in each reference file
+4. **Practical "Working with This Skill" section** - Give users clear guidance on how to navigate the skill
+5. **Key Concepts section** (if applicable) - Explain core concepts
+6. **Keep the frontmatter** (---\nname: ...\n---) intact
+
+IMPORTANT:
+- Extract REAL examples from the reference docs, don't make them up
+- Prioritize SHORT, clear examples (5-20 lines max)
+- Make it actionable and practical
+- Don't be too verbose - be concise but useful
+- Maintain the markdown structure for Claude skills
+- Keep code examples properly formatted with language tags
+
+OUTPUT:
+Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
+"""
+
+        return prompt
+
+    def save_enhanced_skill_md(self, content):
+        """Save the enhanced SKILL.md"""
+        # Backup original
+        if self.skill_md_path.exists():
+            backup_path = self.skill_md_path.with_suffix('.md.backup')
+            self.skill_md_path.rename(backup_path)
+            print(f"  💾 Backed up original to: {backup_path.name}")
+
+        # Save enhanced version
+        self.skill_md_path.write_text(content, encoding='utf-8')
+        print(f"  ✅ Saved enhanced SKILL.md")
+
+    def run(self):
+        """Main enhancement workflow"""
+        print(f"\n{'='*60}")
+        print(f"ENHANCING SKILL: {self.skill_dir.name}")
+        print(f"{'='*60}\n")
+
+        # Read reference files
+        print("📖 Reading reference documentation...")
+        references = self.read_reference_files()
+
+        if not references:
+            print("❌ No reference files found to analyze")
+            return False
+
+        print(f"  ✓ Read {len(references)} reference files")
+        total_size = sum(len(c) for c in references.values())
+        print(f"  ✓ Total size: {total_size:,} characters\n")
+
+        # Read current SKILL.md
+        current_skill_md = self.read_current_skill_md()
+        if current_skill_md:
+            print(f"  ℹ Found existing SKILL.md ({len(current_skill_md)} chars)")
+        else:
+            print(f"  ℹ No existing SKILL.md, will create new one")
+
+        # Enhance with Claude
+        enhanced = self.enhance_skill_md(references, current_skill_md)
+
+        if not enhanced:
+            print("❌ Enhancement failed")
+            return False
+
+        print(f"  ✓ Generated enhanced SKILL.md ({len(enhanced)} chars)\n")
+
+        # Save
+        print("💾 Saving enhanced SKILL.md...")
+        self.save_enhanced_skill_md(enhanced)
+
+        print(f"\n✅ Enhancement complete!")
+        print(f"\nNext steps:")
+        print(f"  1. Review: {self.skill_md_path}")
+        print(f"  2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}")
+        print(f"  3. Package your skill:")
+        print(f"     python3 /mnt/skills/examples/skill-creator/scripts/package_skill.py {self.skill_dir}/")
+
+        return True
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Enhance SKILL.md using Claude API',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Using ANTHROPIC_API_KEY environment variable
+  export ANTHROPIC_API_KEY=sk-ant-...
+  python3 enhance_skill.py output/steam-inventory/
+
+  # Providing API key directly
+  python3 enhance_skill.py output/react/ --api-key sk-ant-...
+
+  # Show what would be done (dry run)
+  python3 enhance_skill.py output/godot/ --dry-run
+"""
+    )
+
+    parser.add_argument('skill_dir', type=str,
+                       help='Path to skill directory (e.g., output/steam-inventory/)')
+    parser.add_argument('--api-key', type=str,
+                       help='Anthropic API key (or set ANTHROPIC_API_KEY env var)')
+    parser.add_argument('--dry-run', action='store_true',
+                       help='Show what would be done without calling API')
+
+    args = parser.parse_args()
+
+    # Validate skill directory
+    skill_dir = Path(args.skill_dir)
+    if not skill_dir.exists():
+        print(f"❌ Error: Directory not found: {skill_dir}")
+        sys.exit(1)
+
+    if not skill_dir.is_dir():
+        print(f"❌ Error: Not a directory: {skill_dir}")
+        sys.exit(1)
+
+    # Dry run mode
+    if args.dry_run:
+        print(f"🔍 DRY RUN MODE")
+        print(f"   Would enhance: {skill_dir}")
+        print(f"   References: {skill_dir / 'references'}")
+        print(f"   SKILL.md: {skill_dir / 'SKILL.md'}")
+
+        refs_dir = skill_dir / "references"
+        if refs_dir.exists():
+            ref_files = list(refs_dir.glob("*.md"))
+            print(f"   Found {len(ref_files)} reference files:")
+            for rf in ref_files:
+                size = rf.stat().st_size
+                print(f"     - {rf.name} ({size:,} bytes)")
+
+        print("\nTo actually run enhancement:")
+        print(f"  python3 enhance_skill.py {skill_dir}")
+        return
+
+    # Create enhancer and run
+    try:
+        enhancer = SkillEnhancer(skill_dir, api_key=args.api_key)
+        success = enhancer.run()
+        sys.exit(0 if success else 1)
+
+    except ValueError as e:
+        print(f"❌ Error: {e}")
+        print("\nSet your API key:")
+        print("  export ANTHROPIC_API_KEY=sk-ant-...")
+        print("Or provide it directly:")
+        print(f"  python3 enhance_skill.py {skill_dir} --api-key sk-ant-...")
+        sys.exit(1)
+    except Exception as e:
+        print(f"❌ Unexpected error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/cli/enhance_skill_local.py
+++ b/cli/enhance_skill_local.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+"""
+SKILL.md Enhancement Script (Local - Using Claude Code)
+Opens a new terminal with Claude Code to enhance SKILL.md, then reports back.
+No API key needed - uses your existing Claude Code Max plan!
+
+Usage:
+    python3 enhance_skill_local.py output/steam-inventory/
+    python3 enhance_skill_local.py output/react/
+"""
+
+import os
+import sys
+import time
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+class LocalSkillEnhancer:
+    def __init__(self, skill_dir):
+        self.skill_dir = Path(skill_dir)
+        self.references_dir = self.skill_dir / "references"
+        self.skill_md_path = self.skill_dir / "SKILL.md"
+
+    def create_enhancement_prompt(self):
+        """Create the prompt file for Claude Code"""
+
+        # Read reference files
+        references = self.read_reference_files()
+
+        if not references:
+            print("❌ No reference files found")
+            return None
+
+        # Read current SKILL.md
+        current_skill_md = ""
+        if self.skill_md_path.exists():
+            current_skill_md = self.skill_md_path.read_text(encoding='utf-8')
+
+        # Build prompt
+        prompt = f"""I need you to enhance the SKILL.md file for the {self.skill_dir.name} skill.
+
+CURRENT SKILL.MD:
+{'-'*60}
+{current_skill_md if current_skill_md else '(No existing SKILL.md - create from scratch)'}
+{'-'*60}
+
+REFERENCE DOCUMENTATION:
+{'-'*60}
+"""
+
+        for filename, content in references.items():
+            prompt += f"\n## {filename}\n{content[:15000]}\n"
+
+        prompt += f"""
+{'-'*60}
+
+YOUR TASK:
+Create an EXCELLENT SKILL.md file that will help Claude use this documentation effectively.
+
+Requirements:
+1. **Clear "When to Use This Skill" section**
+   - Be SPECIFIC about trigger conditions
+   - List concrete use cases
+
+2. **Excellent Quick Reference section**
+   - Extract 5-10 of the BEST, most practical code examples from the reference docs
+   - Choose SHORT, clear examples (5-20 lines max)
+   - Include both simple and intermediate examples
+   - Use proper language tags (cpp, python, javascript, json, etc.)
+   - Add clear descriptions for each example
+
+3. **Detailed Reference Files description**
+   - Explain what's in each reference file
+   - Help users navigate the documentation
+
+4. **Practical "Working with This Skill" section**
+   - Clear guidance for beginners, intermediate, and advanced users
+   - Navigation tips
+
+5. **Key Concepts section** (if applicable)
+   - Explain core concepts
+   - Define important terminology
+
+IMPORTANT:
+- Extract REAL examples from the reference docs above
+- Prioritize SHORT, clear examples
+- Make it actionable and practical
+- Keep the frontmatter (---\\nname: ...\\n---) intact
+- Use proper markdown formatting
+
+SAVE THE RESULT:
+Save the complete enhanced SKILL.md to: {self.skill_md_path.absolute()}
+
+First, backup the original to: {self.skill_md_path.with_suffix('.md.backup').absolute()}
+"""
+
+        return prompt
+
+    def read_reference_files(self, max_chars=50000):
+        """Read reference files with size limit"""
+        references = {}
+
+        if not self.references_dir.exists():
+            return references
+
+        total_chars = 0
+        for ref_file in sorted(self.references_dir.glob("*.md")):
+            if ref_file.name == "index.md":
+                continue
+
+            content = ref_file.read_text(encoding='utf-8')
+
+            # Limit size per file
+            if len(content) > 20000:
+                content = content[:20000] + "\n\n[Content truncated...]"
+
+            references[ref_file.name] = content
+            total_chars += len(content)
+
+            if total_chars > max_chars:
+                break
+
+        return references
+
+    def run(self):
+        """Main enhancement workflow"""
+        print(f"\n{'='*60}")
+        print(f"LOCAL ENHANCEMENT: {self.skill_dir.name}")
+        print(f"{'='*60}\n")
+
+        # Validate
+        if not self.skill_dir.exists():
+            print(f"❌ Directory not found: {self.skill_dir}")
+            return False
+
+        # Read reference files
+        print("📖 Reading reference documentation...")
+        references = self.read_reference_files()
+
+        if not references:
+            print("❌ No reference files found to analyze")
+            return False
+
+        print(f"  ✓ Read {len(references)} reference files")
+        total_size = sum(len(c) for c in references.values())
+        print(f"  ✓ Total size: {total_size:,} characters\n")
+
+        # Create prompt
+        print("📝 Creating enhancement prompt...")
+        prompt = self.create_enhancement_prompt()
+
+        if not prompt:
+            return False
+
+        # Save prompt to temp file
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8') as f:
+            prompt_file = f.name
+            f.write(prompt)
+
+        print(f"  ✓ Prompt saved ({len(prompt):,} characters)\n")
+
+        # Launch Claude Code in new terminal
+        print("🚀 Launching Claude Code in new terminal...")
+        print("   This will:")
+        print("   1. Open a new terminal window")
+        print("   2. Run Claude Code with the enhancement task")
+        print("   3. Claude will read the docs and enhance SKILL.md")
+        print("   4. Terminal will auto-close when done")
+        print()
+
+        # Create a shell script to run in the terminal
+        shell_script = f'''#!/bin/bash
+claude {prompt_file}
+echo ""
+echo "✅ Enhancement complete!"
+echo "Press any key to close..."
+read -n 1
+rm {prompt_file}
+'''
+
+        # Save shell script
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f:
+            script_file = f.name
+            f.write(shell_script)
+
+        os.chmod(script_file, 0o755)
+
+        # Launch in new terminal (macOS specific)
+        if sys.platform == 'darwin':
+            # macOS Terminal - simple approach
+            try:
+                subprocess.Popen(['open', '-a', 'Terminal', script_file])
+            except Exception as e:
+                print(f"⚠️  Error launching terminal: {e}")
+                print(f"\nManually run: {script_file}")
+                return False
+        else:
+            print("⚠️  Auto-launch only works on macOS")
+            print(f"\nManually run this command in a new terminal:")
+            print(f"  claude '{prompt_file}'")
+            print(f"\nThen delete the prompt file:")
+            print(f"  rm '{prompt_file}'")
+            return False
+
+        print("✅ New terminal launched with Claude Code!")
+        print()
+        print("📊 Status:")
+        print(f"  - Prompt file: {prompt_file}")
+        print(f"  - Skill directory: {self.skill_dir.absolute()}")
+        print(f"  - SKILL.md will be saved to: {self.skill_md_path.absolute()}")
+        print(f"  - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}")
+        print()
+        print("⏳ Wait for Claude Code to finish in the other terminal...")
+        print("   (Usually takes 30-60 seconds)")
+        print()
+        print("💡 When done:")
+        print(f"  1. Check the enhanced SKILL.md: {self.skill_md_path}")
+        print(f"  2. If you don't like it, restore: mv {self.skill_md_path.with_suffix('.md.backup')} {self.skill_md_path}")
+        print(f"  3. Package: python3 /mnt/skills/examples/skill-creator/scripts/package_skill.py {self.skill_dir}/")
+
+        return True
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python3 enhance_skill_local.py <skill_directory>")
+        print()
+        print("Examples:")
+        print("  python3 enhance_skill_local.py output/steam-inventory/")
+        print("  python3 enhance_skill_local.py output/react/")
+        sys.exit(1)
+
+    skill_dir = sys.argv[1]
+
+    enhancer = LocalSkillEnhancer(skill_dir)
+    success = enhancer.run()
+
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/cli/estimate_pages.py
+++ b/cli/estimate_pages.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Page Count Estimator for Skill Seeker
+Quickly estimates how many pages a config will scrape without downloading content
+"""
+
+import sys
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, urlparse
+import time
+import json
+
+
+def estimate_pages(config, max_discovery=1000, timeout=30):
+    """
+    Estimate total pages that will be scraped
+
+    Args:
+        config: Configuration dictionary
+        max_discovery: Maximum pages to discover (safety limit)
+        timeout: Timeout for HTTP requests in seconds
+
+    Returns:
+        dict with estimation results
+    """
+    base_url = config['base_url']
+    start_urls = config.get('start_urls', [base_url])
+    url_patterns = config.get('url_patterns', {'include': [], 'exclude': []})
+    rate_limit = config.get('rate_limit', 0.5)
+
+    visited = set()
+    pending = list(start_urls)
+    discovered = 0
+
+    include_patterns = url_patterns.get('include', [])
+    exclude_patterns = url_patterns.get('exclude', [])
+
+    print(f"🔍 Estimating pages for: {config['name']}")
+    print(f"📍 Base URL: {base_url}")
+    print(f"🎯 Start URLs: {len(start_urls)}")
+    print(f"⏱️  Rate limit: {rate_limit}s")
+    print(f"🔢 Max discovery: {max_discovery}")
+    print()
+
+    start_time = time.time()
+
+    while pending and discovered < max_discovery:
+        url = pending.pop(0)
+
+        # Skip if already visited
+        if url in visited:
+            continue
+
+        visited.add(url)
+        discovered += 1
+
+        # Progress indicator
+        if discovered % 10 == 0:
+            elapsed = time.time() - start_time
+            rate = discovered / elapsed if elapsed > 0 else 0
+            print(f"⏳ Discovered: {discovered} pages ({rate:.1f} pages/sec)", end='\r')
+
+        try:
+            # HEAD request first to check if page exists (faster)
+            head_response = requests.head(url, timeout=timeout, allow_redirects=True)
+
+            # Skip non-HTML content
+            content_type = head_response.headers.get('Content-Type', '')
+            if 'text/html' not in content_type:
+                continue
+
+            # Now GET the page to find links
+            response = requests.get(url, timeout=timeout)
+            response.raise_for_status()
+
+            soup = BeautifulSoup(response.content, 'html.parser')
+
+            # Find all links
+            for link in soup.find_all('a', href=True):
+                href = link['href']
+                full_url = urljoin(url, href)
+
+                # Normalize URL
+                parsed = urlparse(full_url)
+                full_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
+
+                # Check if URL is valid
+                if not is_valid_url(full_url, base_url, include_patterns, exclude_patterns):
+                    continue
+
+                # Add to pending if not visited
+                if full_url not in visited and full_url not in pending:
+                    pending.append(full_url)
+
+            # Rate limiting
+            time.sleep(rate_limit)
+
+        except requests.RequestException as e:
+            # Silently skip errors during estimation
+            pass
+        except Exception as e:
+            # Silently skip other errors
+            pass
+
+    elapsed = time.time() - start_time
+
+    # Results
+    results = {
+        'discovered': discovered,
+        'pending': len(pending),
+        'estimated_total': discovered + len(pending),
+        'elapsed_seconds': round(elapsed, 2),
+        'discovery_rate': round(discovered / elapsed if elapsed > 0 else 0, 2),
+        'hit_limit': discovered >= max_discovery
+    }
+
+    return results
+
+
+def is_valid_url(url, base_url, include_patterns, exclude_patterns):
+    """Check if URL should be crawled"""
+    # Must be same domain
+    if not url.startswith(base_url.rstrip('/')):
+        return False
+
+    # Check exclude patterns first
+    if exclude_patterns:
+        for pattern in exclude_patterns:
+            if pattern in url:
+                return False
+
+    # Check include patterns (if specified)
+    if include_patterns:
+        for pattern in include_patterns:
+            if pattern in url:
+                return True
+        return False
+
+    # If no include patterns, accept by default
+    return True
+
+
+def print_results(results, config):
+    """Print estimation results"""
+    print()
+    print("=" * 70)
+    print("📊 ESTIMATION RESULTS")
+    print("=" * 70)
+    print()
+    print(f"Config: {config['name']}")
+    print(f"Base URL: {config['base_url']}")
+    print()
+    print(f"✅ Pages Discovered: {results['discovered']}")
+    print(f"⏳ Pages Pending: {results['pending']}")
+    print(f"📈 Estimated Total: {results['estimated_total']}")
+    print()
+    print(f"⏱️  Time Elapsed: {results['elapsed_seconds']}s")
+    print(f"⚡ Discovery Rate: {results['discovery_rate']} pages/sec")
+
+    if results['hit_limit']:
+        print()
+        print("⚠️  Hit discovery limit - actual total may be higher")
+        print("   Increase max_discovery parameter for more accurate estimate")
+
+    print()
+    print("=" * 70)
+    print("💡 RECOMMENDATIONS")
+    print("=" * 70)
+    print()
+
+    estimated = results['estimated_total']
+    current_max = config.get('max_pages', 100)
+
+    if estimated <= current_max:
+        print(f"✅ Current max_pages ({current_max}) is sufficient")
+    else:
+        recommended = min(estimated + 50, 10000)  # Add 50 buffer, cap at 10k
+        print(f"⚠️  Current max_pages ({current_max}) may be too low")
+        print(f"📝 Recommended max_pages: {recommended}")
+        print(f"   (Estimated {estimated} + 50 buffer)")
+
+    # Estimate time for full scrape
+    rate_limit = config.get('rate_limit', 0.5)
+    estimated_time = (estimated * rate_limit) / 60  # in minutes
+
+    print()
+    print(f"⏱️  Estimated full scrape time: {estimated_time:.1f} minutes")
+    print(f"   (Based on rate_limit: {rate_limit}s)")
+
+    print()
+
+
+def load_config(config_path):
+    """Load configuration from JSON file"""
+    try:
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+        return config
+    except FileNotFoundError:
+        print(f"❌ Error: Config file not found: {config_path}")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"❌ Error: Invalid JSON in config file: {e}")
+        sys.exit(1)
+
+
+def main():
+    """Main entry point"""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description='Estimate page count for Skill Seeker configs',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Estimate pages for a config
+  python3 estimate_pages.py configs/react.json
+
+  # Estimate with higher discovery limit
+  python3 estimate_pages.py configs/godot.json --max-discovery 2000
+
+  # Quick estimate (stop at 100 pages)
+  python3 estimate_pages.py configs/vue.json --max-discovery 100
+        """
+    )
+
+    parser.add_argument('config', help='Path to config JSON file')
+    parser.add_argument('--max-discovery', '-m', type=int, default=1000,
+                       help='Maximum pages to discover (default: 1000)')
+    parser.add_argument('--timeout', '-t', type=int, default=30,
+                       help='HTTP request timeout in seconds (default: 30)')
+
+    args = parser.parse_args()
+
+    # Load config
+    config = load_config(args.config)
+
+    # Run estimation
+    try:
+        results = estimate_pages(config, args.max_discovery, args.timeout)
+        print_results(results, config)
+
+        # Return exit code based on results
+        if results['hit_limit']:
+            return 2  # Warning: hit limit
+        return 0  # Success
+
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Estimation interrupted by user")
+        return 1
+    except Exception as e:
+        print(f"\n\n❌ Error during estimation: {e}")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/cli/package_skill.py
+++ b/cli/package_skill.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Simple Skill Packager
+Packages a skill directory into a .zip file for Claude.
+
+Usage:
+    python3 package_skill.py output/steam-inventory/
+    python3 package_skill.py output/react/
+"""
+
+import os
+import sys
+import zipfile
+from pathlib import Path
+
+
+def package_skill(skill_dir):
+    """Package a skill directory into a .zip file"""
+    skill_path = Path(skill_dir)
+
+    if not skill_path.exists():
+        print(f"❌ Error: Directory not found: {skill_dir}")
+        return False
+
+    if not skill_path.is_dir():
+        print(f"❌ Error: Not a directory: {skill_dir}")
+        return False
+
+    # Verify SKILL.md exists
+    skill_md = skill_path / "SKILL.md"
+    if not skill_md.exists():
+        print(f"❌ Error: SKILL.md not found in {skill_dir}")
+        return False
+
+    # Create zip filename
+    skill_name = skill_path.name
+    zip_path = skill_path.parent / f"{skill_name}.zip"
+
+    print(f"📦 Packaging skill: {skill_name}")
+    print(f"   Source: {skill_path}")
+    print(f"   Output: {zip_path}")
+
+    # Create zip file
+    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+        for root, dirs, files in os.walk(skill_path):
+            # Skip backup files
+            files = [f for f in files if not f.endswith('.backup')]
+
+            for file in files:
+                file_path = Path(root) / file
+                arcname = file_path.relative_to(skill_path)
+                zf.write(file_path, arcname)
+                print(f"   + {arcname}")
+
+    # Get zip size
+    zip_size = zip_path.stat().st_size
+    print(f"\n✅ Package created: {zip_path}")
+    print(f"   Size: {zip_size:,} bytes ({zip_size / 1024:.1f} KB)")
+
+    return True
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python3 package_skill.py <skill_directory>")
+        print()
+        print("Examples:")
+        print("  python3 package_skill.py output/steam-inventory/")
+        print("  python3 package_skill.py output/react/")
+        sys.exit(1)
+
+    skill_dir = sys.argv[1]
+    success = package_skill(skill_dir)
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/cli/run_tests.py
+++ b/cli/run_tests.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""
+Test Runner for Skill Seeker
+Runs all test suites and generates a comprehensive test report
+"""
+
+import sys
+import unittest
+import os
+from io import StringIO
+from pathlib import Path
+
+
+class ColoredTextTestResult(unittest.TextTestResult):
+    """Custom test result class with colored output"""
+
+    # ANSI color codes
+    GREEN = '\033[92m'
+    RED = '\033[91m'
+    YELLOW = '\033[93m'
+    BLUE = '\033[94m'
+    RESET = '\033[0m'
+    BOLD = '\033[1m'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.test_results = []
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self.test_results.append(('PASS', test))
+        if self.showAll:
+            self.stream.write(f"{self.GREEN}✓ PASS{self.RESET}\n")
+        elif self.dots:
+            self.stream.write(f"{self.GREEN}.{self.RESET}")
+            self.stream.flush()
+
+    def addError(self, test, err):
+        super().addError(test, err)
+        self.test_results.append(('ERROR', test))
+        if self.showAll:
+            self.stream.write(f"{self.RED}✗ ERROR{self.RESET}\n")
+        elif self.dots:
+            self.stream.write(f"{self.RED}E{self.RESET}")
+            self.stream.flush()
+
+    def addFailure(self, test, err):
+        super().addFailure(test, err)
+        self.test_results.append(('FAIL', test))
+        if self.showAll:
+            self.stream.write(f"{self.RED}✗ FAIL{self.RESET}\n")
+        elif self.dots:
+            self.stream.write(f"{self.RED}F{self.RESET}")
+            self.stream.flush()
+
+    def addSkip(self, test, reason):
+        super().addSkip(test, reason)
+        self.test_results.append(('SKIP', test))
+        if self.showAll:
+            self.stream.write(f"{self.YELLOW}⊘ SKIP{self.RESET}\n")
+        elif self.dots:
+            self.stream.write(f"{self.YELLOW}s{self.RESET}")
+            self.stream.flush()
+
+
+class ColoredTextTestRunner(unittest.TextTestRunner):
+    """Custom test runner with colored output"""
+    resultclass = ColoredTextTestResult
+
+
+def discover_tests(test_dir='tests'):
+    """Discover all test files in the tests directory"""
+    loader = unittest.TestLoader()
+    start_dir = test_dir
+    pattern = 'test_*.py'
+
+    suite = loader.discover(start_dir, pattern=pattern)
+    return suite
+
+
+def run_specific_suite(suite_name):
+    """Run a specific test suite"""
+    loader = unittest.TestLoader()
+
+    suite_map = {
+        'config': 'tests.test_config_validation',
+        'features': 'tests.test_scraper_features',
+        'integration': 'tests.test_integration'
+    }
+
+    if suite_name not in suite_map:
+        print(f"Unknown test suite: {suite_name}")
+        print(f"Available suites: {', '.join(suite_map.keys())}")
+        return None
+
+    module_name = suite_map[suite_name]
+    try:
+        suite = loader.loadTestsFromName(module_name)
+        return suite
+    except Exception as e:
+        print(f"Error loading test suite '{suite_name}': {e}")
+        return None
+
+
+def print_summary(result):
+    """Print a detailed test summary"""
+    total = result.testsRun
+    passed = total - len(result.failures) - len(result.errors) - len(result.skipped)
+    failed = len(result.failures)
+    errors = len(result.errors)
+    skipped = len(result.skipped)
+
+    print("\n" + "="*70)
+    print("TEST SUMMARY")
+    print("="*70)
+
+    # Overall stats
+    print(f"\n{ColoredTextTestResult.BOLD}Total Tests:{ColoredTextTestResult.RESET} {total}")
+    print(f"{ColoredTextTestResult.GREEN}✓ Passed:{ColoredTextTestResult.RESET} {passed}")
+    if failed > 0:
+        print(f"{ColoredTextTestResult.RED}✗ Failed:{ColoredTextTestResult.RESET} {failed}")
+    if errors > 0:
+        print(f"{ColoredTextTestResult.RED}✗ Errors:{ColoredTextTestResult.RESET} {errors}")
+    if skipped > 0:
+        print(f"{ColoredTextTestResult.YELLOW}⊘ Skipped:{ColoredTextTestResult.RESET} {skipped}")
+
+    # Success rate
+    if total > 0:
+        success_rate = (passed / total) * 100
+        color = ColoredTextTestResult.GREEN if success_rate == 100 else \
+                ColoredTextTestResult.YELLOW if success_rate >= 80 else \
+                ColoredTextTestResult.RED
+        print(f"\n{color}Success Rate: {success_rate:.1f}%{ColoredTextTestResult.RESET}")
+
+    # Category breakdown
+    if hasattr(result, 'test_results'):
+        print(f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}")
+
+        categories = {}
+        for status, test in result.test_results:
+            test_name = str(test)
+            # Extract test class name
+            if '.' in test_name:
+                class_name = test_name.split('.')[0].split()[-1]
+                if class_name not in categories:
+                    categories[class_name] = {'PASS': 0, 'FAIL': 0, 'ERROR': 0, 'SKIP': 0}
+                categories[class_name][status] += 1
+
+        for category, stats in sorted(categories.items()):
+            total_cat = sum(stats.values())
+            passed_cat = stats['PASS']
+            print(f"  {category}: {passed_cat}/{total_cat} passed")
+
+    print("\n" + "="*70)
+
+    # Return status
+    return failed == 0 and errors == 0
+
+
+def main():
+    """Main test runner"""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description='Run tests for Skill Seeker',
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+
+    parser.add_argument('--suite', '-s', type=str,
+                       help='Run specific test suite (config, features, integration)')
+    parser.add_argument('--verbose', '-v', action='store_true',
+                       help='Verbose output (show each test)')
+    parser.add_argument('--quiet', '-q', action='store_true',
+                       help='Quiet output (minimal output)')
+    parser.add_argument('--failfast', '-f', action='store_true',
+                       help='Stop on first failure')
+    parser.add_argument('--list', '-l', action='store_true',
+                       help='List all available tests')
+
+    args = parser.parse_args()
+
+    # Set verbosity
+    verbosity = 1
+    if args.verbose:
+        verbosity = 2
+    elif args.quiet:
+        verbosity = 0
+
+    print(f"\n{ColoredTextTestResult.BOLD}{'='*70}{ColoredTextTestResult.RESET}")
+    print(f"{ColoredTextTestResult.BOLD}SKILL SEEKER TEST SUITE{ColoredTextTestResult.RESET}")
+    print(f"{ColoredTextTestResult.BOLD}{'='*70}{ColoredTextTestResult.RESET}\n")
+
+    # Discover or load specific suite
+    if args.suite:
+        print(f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n")
+        suite = run_specific_suite(args.suite)
+        if suite is None:
+            return 1
+    else:
+        print(f"Running {ColoredTextTestResult.BLUE}all tests{ColoredTextTestResult.RESET}\n")
+        suite = discover_tests()
+
+    # List tests
+    if args.list:
+        print("\nAvailable tests:\n")
+        for test_group in suite:
+            for test in test_group:
+                print(f"  - {test}")
+        print()
+        return 0
+
+    # Run tests
+    runner = ColoredTextTestRunner(
+        verbosity=verbosity,
+        failfast=args.failfast
+    )
+
+    result = runner.run(suite)
+
+    # Print summary
+    success = print_summary(result)
+
+    # Return appropriate exit code
+    return 0 if success else 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())