feat: Unified create command + consolidated enhancement flags

This commit includes two major improvements: ## 1. Unified Create Command (v3.0.0 feature) - Auto-detects source type (web, GitHub, local, PDF, config) - Three-tier argument organization (universal, source-specific, advanced) - Routes to existing scrapers (100% backward compatible) - Progressive disclosure: 15 universal flags in default help **New files:** - src/skill_seekers/cli/source_detector.py - Auto-detection logic - src/skill_seekers/cli/arguments/create.py - Argument definitions - src/skill_seekers/cli/create_command.py - Main orchestrator - src/skill_seekers/cli/parsers/create_parser.py - Parser integration **Tests:** - tests/test_source_detector.py (35 tests) - tests/test_create_arguments.py (30 tests) - tests/test_create_integration_basic.py (10 tests) ## 2. Enhanced Flag Consolidation (Phase 1) - Consolidated 3 flags (--enhance, --enhance-local, --enhance-level) → 1 flag - --enhance-level 0-3 with auto-detection of API vs LOCAL mode - Default: --enhance-level 2 (balanced enhancement) **Modified files:** - arguments/{common,create,scrape,github,analyze}.py - Added enhance_level - {doc_scraper,github_scraper,config_extractor,main}.py - Updated logic - create_command.py - Uses consolidated flag **Auto-detection:** - If ANTHROPIC_API_KEY set → API mode - Else → LOCAL mode (Claude Code) ## 3. PresetManager Bug Fix - Fixed module naming conflict (presets.py vs presets/ directory) - Moved presets.py → presets/manager.py - Updated __init__.py exports **Test Results:** - All 160+ tests passing - Zero regressions - 100% backward compatible Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-15 14:29:19 +03:00
parent aa952aff81
commit ba1670a220
53 changed files with 10144 additions and 589 deletions
--- a/src/skill_seekers/cli/source_detector.py
+++ b/src/skill_seekers/cli/source_detector.py
@@ -0,0 +1,214 @@
+"""Source type detection for unified create command.
+
+Auto-detects whether a source is a web URL, GitHub repository,
+local directory, PDF file, or config file based on patterns.
+"""
+
+import os
+import re
+from dataclasses import dataclass
+from typing import Dict, Any, Optional
+from urllib.parse import urlparse
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SourceInfo:
+    """Information about a detected source.
+
+    Attributes:
+        type: Source type ('web', 'github', 'local', 'pdf', 'config')
+        parsed: Parsed source information (e.g., {'url': '...'}, {'repo': '...'})
+        suggested_name: Auto-suggested name for the skill
+        raw_input: Original user input
+    """
+    type: str
+    parsed: Dict[str, Any]
+    suggested_name: str
+    raw_input: str
+
+
+class SourceDetector:
+    """Detects source type from user input and extracts relevant information."""
+
+    # GitHub repo patterns
+    GITHUB_REPO_PATTERN = re.compile(r'^([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)$')
+    GITHUB_URL_PATTERN = re.compile(
+        r'(?:https?://)?(?:www\.)?github\.com/([a-zA-Z0-9_.-]+)/([a-zA-Z0-9_.-]+)(?:\.git)?'
+    )
+
+    @classmethod
+    def detect(cls, source: str) -> SourceInfo:
+        """Detect source type and extract information.
+
+        Args:
+            source: User input (URL, path, repo, etc.)
+
+        Returns:
+            SourceInfo object with detected type and parsed data
+
+        Raises:
+            ValueError: If source type cannot be determined
+        """
+        # 1. File extension detection
+        if source.endswith('.json'):
+            return cls._detect_config(source)
+
+        if source.endswith('.pdf'):
+            return cls._detect_pdf(source)
+
+        # 2. Directory detection
+        if os.path.isdir(source):
+            return cls._detect_local(source)
+
+        # 3. GitHub patterns
+        github_info = cls._detect_github(source)
+        if github_info:
+            return github_info
+
+        # 4. URL detection
+        if source.startswith('http://') or source.startswith('https://'):
+            return cls._detect_web(source)
+
+        # 5. Domain inference (add https://)
+        if '.' in source and not source.startswith('/'):
+            return cls._detect_web(f'https://{source}')
+
+        # 6. Error - cannot determine
+        raise ValueError(
+            f"Cannot determine source type for: {source}\n\n"
+            "Examples:\n"
+            "  Web:    skill-seekers create https://docs.react.dev/\n"
+            "  GitHub: skill-seekers create facebook/react\n"
+            "  Local:  skill-seekers create ./my-project\n"
+            "  PDF:    skill-seekers create tutorial.pdf\n"
+            "  Config: skill-seekers create configs/react.json"
+        )
+
+    @classmethod
+    def _detect_config(cls, source: str) -> SourceInfo:
+        """Detect config file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type='config',
+            parsed={'config_path': source},
+            suggested_name=name,
+            raw_input=source
+        )
+
+    @classmethod
+    def _detect_pdf(cls, source: str) -> SourceInfo:
+        """Detect PDF file source."""
+        name = os.path.splitext(os.path.basename(source))[0]
+        return SourceInfo(
+            type='pdf',
+            parsed={'file_path': source},
+            suggested_name=name,
+            raw_input=source
+        )
+
+    @classmethod
+    def _detect_local(cls, source: str) -> SourceInfo:
+        """Detect local directory source."""
+        # Clean up path
+        directory = os.path.abspath(source)
+        name = os.path.basename(directory)
+
+        return SourceInfo(
+            type='local',
+            parsed={'directory': directory},
+            suggested_name=name,
+            raw_input=source
+        )
+
+    @classmethod
+    def _detect_github(cls, source: str) -> Optional[SourceInfo]:
+        """Detect GitHub repository source.
+
+        Supports patterns:
+        - owner/repo
+        - github.com/owner/repo
+        - https://github.com/owner/repo
+        """
+        # Try simple owner/repo pattern first
+        match = cls.GITHUB_REPO_PATTERN.match(source)
+        if match:
+            owner, repo = match.groups()
+            return SourceInfo(
+                type='github',
+                parsed={'repo': f'{owner}/{repo}'},
+                suggested_name=repo,
+                raw_input=source
+            )
+
+        # Try GitHub URL pattern
+        match = cls.GITHUB_URL_PATTERN.search(source)
+        if match:
+            owner, repo = match.groups()
+            # Clean up repo name (remove .git suffix if present)
+            if repo.endswith('.git'):
+                repo = repo[:-4]
+            return SourceInfo(
+                type='github',
+                parsed={'repo': f'{owner}/{repo}'},
+                suggested_name=repo,
+                raw_input=source
+            )
+
+        return None
+
+    @classmethod
+    def _detect_web(cls, source: str) -> SourceInfo:
+        """Detect web documentation source."""
+        # Parse URL to extract domain for suggested name
+        parsed_url = urlparse(source)
+        domain = parsed_url.netloc or parsed_url.path
+
+        # Clean up domain for name suggestion
+        # docs.react.dev -> react
+        # reactjs.org -> react
+        name = domain.replace('www.', '').replace('docs.', '')
+        name = name.split('.')[0]  # Take first part before TLD
+
+        return SourceInfo(
+            type='web',
+            parsed={'url': source},
+            suggested_name=name,
+            raw_input=source
+        )
+
+    @classmethod
+    def validate_source(cls, source_info: SourceInfo) -> None:
+        """Validate that source is accessible.
+
+        Args:
+            source_info: Detected source information
+
+        Raises:
+            ValueError: If source is not accessible
+        """
+        if source_info.type == 'local':
+            directory = source_info.parsed['directory']
+            if not os.path.exists(directory):
+                raise ValueError(f"Directory does not exist: {directory}")
+            if not os.path.isdir(directory):
+                raise ValueError(f"Path is not a directory: {directory}")
+
+        elif source_info.type == 'pdf':
+            file_path = source_info.parsed['file_path']
+            if not os.path.exists(file_path):
+                raise ValueError(f"PDF file does not exist: {file_path}")
+            if not os.path.isfile(file_path):
+                raise ValueError(f"Path is not a file: {file_path}")
+
+        elif source_info.type == 'config':
+            config_path = source_info.parsed['config_path']
+            if not os.path.exists(config_path):
+                raise ValueError(f"Config file does not exist: {config_path}")
+            if not os.path.isfile(config_path):
+                raise ValueError(f"Path is not a file: {config_path}")
+
+        # For web and github, validation happens during scraping
+        # (URL accessibility, repo existence)