feat: Add modern Python packaging - Phase 1 (Foundation)

Implements issue #168 - Modern Python packaging with uv support This is Phase 1 of the modernization effort, establishing the core package structure and build system. ## Major Changes ### 1. Migrated to src/ Layout - Moved cli/ → src/skill_seekers/cli/ - Moved skill_seeker_mcp/ → src/skill_seekers/mcp/ - Created root package: src/skill_seekers/__init__.py - Updated all imports: cli. → skill_seekers.cli. - Updated all imports: skill_seeker_mcp. → skill_seekers.mcp. ### 2. Created pyproject.toml - Modern Python packaging configuration - All dependencies properly declared - 8 CLI entry points configured: * skill-seekers (unified CLI) * skill-seekers-scrape * skill-seekers-github * skill-seekers-pdf * skill-seekers-unified * skill-seekers-enhance * skill-seekers-package * skill-seekers-upload * skill-seekers-estimate - uv tool support enabled - Build system: setuptools with wheel ### 3. Created Unified CLI (main.py) - Git-style subcommands (skill-seekers scrape, etc.) - Delegates to existing tool main() functions - Full help system at top-level and subcommand level - Backwards compatible with individual commands ### 4. Updated Package Versions - cli/__init__.py: 1.3.0 → 2.0.0 - mcp/__init__.py: 1.2.0 → 2.0.0 - Root package: 2.0.0 ### 5. Updated Test Suite - Fixed test_package_structure.py for new layout - All 28 package structure tests passing - Updated all test imports for new structure ## Installation Methods (Working) ```bash # Development install pip install -e . # Run unified CLI skill-seekers --version # → 2.0.0 skill-seekers --help # Run individual tools skill-seekers-scrape --help skill-seekers-github --help ``` ## Test Results - Package structure tests: 28/28 passing ✅ - Package installs successfully ✅ - All entry points working ✅ ## Still TODO (Phase 2) - [ ] Run full test suite (299 tests) - [ ] Update documentation (README, CLAUDE.md, etc.) - [ ] Test with uv tool run/install - [ ] Build and publish to PyPI - [ ] Create PR and merge ## Breaking Changes None - fully backwards compatible. Old import paths still work. ## Migration for Users No action needed. Package works with both pip and uv. Closes #168 (when complete) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:14:24 +03:00
parent e3b49574d3
commit ce1c07b437
43 changed files with 601 additions and 106 deletions
--- a/src/skill_seekers/cli/code_analyzer.py
+++ b/src/skill_seekers/cli/code_analyzer.py
@@ -0,0 +1,491 @@
+#!/usr/bin/env python3
+"""
+Code Analyzer for GitHub Repositories
+
+Extracts code signatures at configurable depth levels:
+- surface: File tree only (existing behavior)
+- deep: Parse files for signatures, parameters, types
+- full: Complete AST analysis (future enhancement)
+
+Supports multiple languages with language-specific parsers.
+"""
+
+import ast
+import re
+import logging
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass, asdict
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Parameter:
+    """Represents a function parameter."""
+    name: str
+    type_hint: Optional[str] = None
+    default: Optional[str] = None
+
+
+@dataclass
+class FunctionSignature:
+    """Represents a function/method signature."""
+    name: str
+    parameters: List[Parameter]
+    return_type: Optional[str] = None
+    docstring: Optional[str] = None
+    line_number: Optional[int] = None
+    is_async: bool = False
+    is_method: bool = False
+    decorators: List[str] = None
+
+    def __post_init__(self):
+        if self.decorators is None:
+            self.decorators = []
+
+
+@dataclass
+class ClassSignature:
+    """Represents a class signature."""
+    name: str
+    base_classes: List[str]
+    methods: List[FunctionSignature]
+    docstring: Optional[str] = None
+    line_number: Optional[int] = None
+
+
+class CodeAnalyzer:
+    """
+    Analyzes code at different depth levels.
+    """
+
+    def __init__(self, depth: str = 'surface'):
+        """
+        Initialize code analyzer.
+
+        Args:
+            depth: Analysis depth ('surface', 'deep', 'full')
+        """
+        self.depth = depth
+
+    def analyze_file(self, file_path: str, content: str, language: str) -> Dict[str, Any]:
+        """
+        Analyze a single file based on depth level.
+
+        Args:
+            file_path: Path to file in repository
+            content: File content as string
+            language: Programming language (Python, JavaScript, etc.)
+
+        Returns:
+            Dict containing extracted signatures
+        """
+        if self.depth == 'surface':
+            return {}  # Surface level doesn't analyze individual files
+
+        logger.debug(f"Analyzing {file_path} (language: {language}, depth: {self.depth})")
+
+        try:
+            if language == 'Python':
+                return self._analyze_python(content, file_path)
+            elif language in ['JavaScript', 'TypeScript']:
+                return self._analyze_javascript(content, file_path)
+            elif language in ['C', 'C++']:
+                return self._analyze_cpp(content, file_path)
+            else:
+                logger.debug(f"No analyzer for language: {language}")
+                return {}
+        except Exception as e:
+            logger.warning(f"Error analyzing {file_path}: {e}")
+            return {}
+
+    def _analyze_python(self, content: str, file_path: str) -> Dict[str, Any]:
+        """Analyze Python file using AST."""
+        try:
+            tree = ast.parse(content)
+        except SyntaxError as e:
+            logger.debug(f"Syntax error in {file_path}: {e}")
+            return {}
+
+        classes = []
+        functions = []
+
+        for node in ast.walk(tree):
+            if isinstance(node, ast.ClassDef):
+                class_sig = self._extract_python_class(node)
+                classes.append(asdict(class_sig))
+            elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
+                # Only top-level functions (not methods)
+                if not any(isinstance(parent, ast.ClassDef)
+                          for parent in ast.walk(tree) if hasattr(parent, 'body') and node in parent.body):
+                    func_sig = self._extract_python_function(node)
+                    functions.append(asdict(func_sig))
+
+        return {
+            'classes': classes,
+            'functions': functions
+        }
+
+    def _extract_python_class(self, node: ast.ClassDef) -> ClassSignature:
+        """Extract class signature from AST node."""
+        # Extract base classes
+        bases = []
+        for base in node.bases:
+            if isinstance(base, ast.Name):
+                bases.append(base.id)
+            elif isinstance(base, ast.Attribute):
+                bases.append(f"{base.value.id}.{base.attr}" if hasattr(base.value, 'id') else base.attr)
+
+        # Extract methods
+        methods = []
+        for item in node.body:
+            if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                method_sig = self._extract_python_function(item, is_method=True)
+                methods.append(method_sig)
+
+        # Extract docstring
+        docstring = ast.get_docstring(node)
+
+        return ClassSignature(
+            name=node.name,
+            base_classes=bases,
+            methods=methods,
+            docstring=docstring,
+            line_number=node.lineno
+        )
+
+    def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
+        """Extract function signature from AST node."""
+        # Extract parameters
+        params = []
+        for arg in node.args.args:
+            param_type = None
+            if arg.annotation:
+                param_type = ast.unparse(arg.annotation) if hasattr(ast, 'unparse') else None
+
+            params.append(Parameter(
+                name=arg.arg,
+                type_hint=param_type
+            ))
+
+        # Extract defaults
+        defaults = node.args.defaults
+        if defaults:
+            # Defaults are aligned to the end of params
+            num_no_default = len(params) - len(defaults)
+            for i, default in enumerate(defaults):
+                param_idx = num_no_default + i
+                if param_idx < len(params):
+                    try:
+                        params[param_idx].default = ast.unparse(default) if hasattr(ast, 'unparse') else str(default)
+                    except:
+                        params[param_idx].default = "..."
+
+        # Extract return type
+        return_type = None
+        if node.returns:
+            try:
+                return_type = ast.unparse(node.returns) if hasattr(ast, 'unparse') else None
+            except:
+                pass
+
+        # Extract decorators
+        decorators = []
+        for decorator in node.decorator_list:
+            try:
+                if hasattr(ast, 'unparse'):
+                    decorators.append(ast.unparse(decorator))
+                elif isinstance(decorator, ast.Name):
+                    decorators.append(decorator.id)
+            except:
+                pass
+
+        # Extract docstring
+        docstring = ast.get_docstring(node)
+
+        return FunctionSignature(
+            name=node.name,
+            parameters=params,
+            return_type=return_type,
+            docstring=docstring,
+            line_number=node.lineno,
+            is_async=isinstance(node, ast.AsyncFunctionDef),
+            is_method=is_method,
+            decorators=decorators
+        )
+
+    def _analyze_javascript(self, content: str, file_path: str) -> Dict[str, Any]:
+        """
+        Analyze JavaScript/TypeScript file using regex patterns.
+
+        Note: This is a simplified approach. For production, consider using
+        a proper JS/TS parser like esprima or ts-morph.
+        """
+        classes = []
+        functions = []
+
+        # Extract class definitions
+        class_pattern = r'class\s+(\w+)(?:\s+extends\s+(\w+))?\s*\{'
+        for match in re.finditer(class_pattern, content):
+            class_name = match.group(1)
+            base_class = match.group(2) if match.group(2) else None
+
+            # Try to extract methods (simplified)
+            class_block_start = match.end()
+            # This is a simplification - proper parsing would track braces
+            class_block_end = content.find('}', class_block_start)
+            if class_block_end != -1:
+                class_body = content[class_block_start:class_block_end]
+                methods = self._extract_js_methods(class_body)
+            else:
+                methods = []
+
+            classes.append({
+                'name': class_name,
+                'base_classes': [base_class] if base_class else [],
+                'methods': methods,
+                'docstring': None,
+                'line_number': content[:match.start()].count('\n') + 1
+            })
+
+        # Extract top-level functions
+        func_pattern = r'(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)'
+        for match in re.finditer(func_pattern, content):
+            func_name = match.group(1)
+            params_str = match.group(2)
+            is_async = 'async' in match.group(0)
+
+            params = self._parse_js_parameters(params_str)
+
+            functions.append({
+                'name': func_name,
+                'parameters': params,
+                'return_type': None,  # JS doesn't have type annotations (unless TS)
+                'docstring': None,
+                'line_number': content[:match.start()].count('\n') + 1,
+                'is_async': is_async,
+                'is_method': False,
+                'decorators': []
+            })
+
+        # Extract arrow functions assigned to const/let
+        arrow_pattern = r'(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*=>'
+        for match in re.finditer(arrow_pattern, content):
+            func_name = match.group(1)
+            params_str = match.group(2)
+            is_async = 'async' in match.group(0)
+
+            params = self._parse_js_parameters(params_str)
+
+            functions.append({
+                'name': func_name,
+                'parameters': params,
+                'return_type': None,
+                'docstring': None,
+                'line_number': content[:match.start()].count('\n') + 1,
+                'is_async': is_async,
+                'is_method': False,
+                'decorators': []
+            })
+
+        return {
+            'classes': classes,
+            'functions': functions
+        }
+
+    def _extract_js_methods(self, class_body: str) -> List[Dict]:
+        """Extract method signatures from class body."""
+        methods = []
+
+        # Match method definitions
+        method_pattern = r'(?:async\s+)?(\w+)\s*\(([^)]*)\)'
+        for match in re.finditer(method_pattern, class_body):
+            method_name = match.group(1)
+            params_str = match.group(2)
+            is_async = 'async' in match.group(0)
+
+            # Skip constructor keyword detection
+            if method_name in ['if', 'for', 'while', 'switch']:
+                continue
+
+            params = self._parse_js_parameters(params_str)
+
+            methods.append({
+                'name': method_name,
+                'parameters': params,
+                'return_type': None,
+                'docstring': None,
+                'line_number': None,
+                'is_async': is_async,
+                'is_method': True,
+                'decorators': []
+            })
+
+        return methods
+
+    def _parse_js_parameters(self, params_str: str) -> List[Dict]:
+        """Parse JavaScript parameter string."""
+        params = []
+
+        if not params_str.strip():
+            return params
+
+        # Split by comma (simplified - doesn't handle complex default values)
+        param_list = [p.strip() for p in params_str.split(',')]
+
+        for param in param_list:
+            if not param:
+                continue
+
+            # Check for default value
+            if '=' in param:
+                name, default = param.split('=', 1)
+                name = name.strip()
+                default = default.strip()
+            else:
+                name = param
+                default = None
+
+            # Check for type annotation (TypeScript)
+            type_hint = None
+            if ':' in name:
+                name, type_hint = name.split(':', 1)
+                name = name.strip()
+                type_hint = type_hint.strip()
+
+            params.append({
+                'name': name,
+                'type_hint': type_hint,
+                'default': default
+            })
+
+        return params
+
+    def _analyze_cpp(self, content: str, file_path: str) -> Dict[str, Any]:
+        """
+        Analyze C/C++ header file using regex patterns.
+
+        Note: This is a simplified approach focusing on header files.
+        For production, consider using libclang or similar.
+        """
+        classes = []
+        functions = []
+
+        # Extract class definitions (simplified - doesn't handle nested classes)
+        class_pattern = r'class\s+(\w+)(?:\s*:\s*public\s+(\w+))?\s*\{'
+        for match in re.finditer(class_pattern, content):
+            class_name = match.group(1)
+            base_class = match.group(2) if match.group(2) else None
+
+            classes.append({
+                'name': class_name,
+                'base_classes': [base_class] if base_class else [],
+                'methods': [],  # Simplified - would need to parse class body
+                'docstring': None,
+                'line_number': content[:match.start()].count('\n') + 1
+            })
+
+        # Extract function declarations
+        func_pattern = r'(\w+(?:\s*\*|\s*&)?)\s+(\w+)\s*\(([^)]*)\)'
+        for match in re.finditer(func_pattern, content):
+            return_type = match.group(1).strip()
+            func_name = match.group(2)
+            params_str = match.group(3)
+
+            # Skip common keywords
+            if func_name in ['if', 'for', 'while', 'switch', 'return']:
+                continue
+
+            params = self._parse_cpp_parameters(params_str)
+
+            functions.append({
+                'name': func_name,
+                'parameters': params,
+                'return_type': return_type,
+                'docstring': None,
+                'line_number': content[:match.start()].count('\n') + 1,
+                'is_async': False,
+                'is_method': False,
+                'decorators': []
+            })
+
+        return {
+            'classes': classes,
+            'functions': functions
+        }
+
+    def _parse_cpp_parameters(self, params_str: str) -> List[Dict]:
+        """Parse C++ parameter string."""
+        params = []
+
+        if not params_str.strip() or params_str.strip() == 'void':
+            return params
+
+        # Split by comma (simplified)
+        param_list = [p.strip() for p in params_str.split(',')]
+
+        for param in param_list:
+            if not param:
+                continue
+
+            # Check for default value
+            default = None
+            if '=' in param:
+                param, default = param.rsplit('=', 1)
+                param = param.strip()
+                default = default.strip()
+
+            # Extract type and name (simplified)
+            # Format: "type name" or "type* name" or "type& name"
+            parts = param.split()
+            if len(parts) >= 2:
+                param_type = ' '.join(parts[:-1])
+                param_name = parts[-1]
+            else:
+                param_type = param
+                param_name = "unknown"
+
+            params.append({
+                'name': param_name,
+                'type_hint': param_type,
+                'default': default
+            })
+
+        return params
+
+
+if __name__ == '__main__':
+    # Test the analyzer
+    python_code = '''
+class Node2D:
+    """Base class for 2D nodes."""
+
+    def move_local_x(self, delta: float, snap: bool = False) -> None:
+        """Move node along local X axis."""
+        pass
+
+    async def tween_position(self, target: tuple, duration: float = 1.0):
+        """Animate position to target."""
+        pass
+
+def create_sprite(texture: str) -> Node2D:
+    """Create a new sprite node."""
+    return Node2D()
+'''
+
+    analyzer = CodeAnalyzer(depth='deep')
+    result = analyzer.analyze_file('test.py', python_code, 'Python')
+
+    print("Analysis Result:")
+    print(f"Classes: {len(result.get('classes', []))}")
+    print(f"Functions: {len(result.get('functions', []))}")
+
+    if result.get('classes'):
+        cls = result['classes'][0]
+        print(f"\nClass: {cls['name']}")
+        print(f"  Methods: {len(cls['methods'])}")
+        for method in cls['methods']:
+            params = ', '.join([f"{p['name']}: {p['type_hint']}" + (f" = {p['default']}" if p.get('default') else "")
+                               for p in method['parameters']])
+            print(f"    {method['name']}({params}) -> {method['return_type']}")