run ruff

2026-01-17 17:29:21 +00:00
parent c89f059712
commit 5ed767ff9a
144 changed files with 14142 additions and 16488 deletions
--- a/src/skill_seekers/cli/conflict_detector.py
+++ b/src/skill_seekers/cli/conflict_detector.py
@@ -13,9 +13,9 @@ Used by unified scraper to identify discrepancies before merging.

 import json
 import logging
-from typing import Dict, List, Any, Optional, Tuple
-from dataclasses import dataclass, asdict
+from dataclasses import asdict, dataclass
 from difflib import SequenceMatcher
+from typing import Any

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -24,13 +24,14 @@ logger = logging.getLogger(__name__)
@dataclass
 class Conflict:
    """Represents a conflict between documentation and code."""
+
    type: str  # 'missing_in_docs', 'missing_in_code', 'signature_mismatch', 'description_mismatch'
    severity: str  # 'low', 'medium', 'high'
    api_name: str
-    docs_info: Optional[Dict[str, Any]] = None
-    code_info: Optional[Dict[str, Any]] = None
-    difference: Optional[str] = None
-    suggestion: Optional[str] = None
+    docs_info: dict[str, Any] | None = None
+    code_info: dict[str, Any] | None = None
+    difference: str | None = None
+    suggestion: str | None = None


 class ConflictDetector:
@@ -38,7 +39,7 @@ class ConflictDetector:
    Detects conflicts between documentation and code sources.
    """

-    def __init__(self, docs_data: Dict[str, Any], github_data: Dict[str, Any]):
+    def __init__(self, docs_data: dict[str, Any], github_data: dict[str, Any]):
        """
        Initialize conflict detector.

@@ -56,7 +57,7 @@ class ConflictDetector:
        logger.info(f"Loaded {len(self.docs_apis)} APIs from documentation")
        logger.info(f"Loaded {len(self.code_apis)} APIs from code")

-    def _extract_docs_apis(self) -> Dict[str, Dict[str, Any]]:
+    def _extract_docs_apis(self) -> dict[str, dict[str, Any]]:
        """
        Extract API information from documentation data.

@@ -66,42 +67,43 @@ class ConflictDetector:
        apis = {}

        # Documentation structure varies, but typically has 'pages' or 'references'
-        pages = self.docs_data.get('pages', {})
+        pages = self.docs_data.get("pages", {})

        # Handle both dict and list formats
        if isinstance(pages, dict):
            # Format: {url: page_data, ...}
            for url, page_data in pages.items():
-                content = page_data.get('content', '')
-                title = page_data.get('title', '')
+                content = page_data.get("content", "")
+                title = page_data.get("title", "")

                # Simple heuristic: if title or URL contains "api", "reference", "class", "function"
                # it might be an API page
-                if any(keyword in title.lower() or keyword in url.lower()
-                       for keyword in ['api', 'reference', 'class', 'function', 'method']):
-
+                if any(
+                    keyword in title.lower() or keyword in url.lower()
+                    for keyword in ["api", "reference", "class", "function", "method"]
+                ):
                    # Extract API signatures from content (simplified)
                    extracted_apis = self._parse_doc_content_for_apis(content, url)
                    apis.update(extracted_apis)
        elif isinstance(pages, list):
            # Format: [{url: '...', apis: [...]}, ...]
            for page in pages:
-                url = page.get('url', '')
-                page_apis = page.get('apis', [])
+                url = page.get("url", "")
+                page_apis = page.get("apis", [])

                # If APIs are already extracted in the page data
                for api in page_apis:
-                    api_name = api.get('name', '')
+                    api_name = api.get("name", "")
                    if api_name:
                        apis[api_name] = {
-                            'parameters': api.get('parameters', []),
-                            'return_type': api.get('return_type', 'Any'),
-                            'source_url': url
+                            "parameters": api.get("parameters", []),
+                            "return_type": api.get("return_type", "Any"),
+                            "source_url": url,
                        }

        return apis

-    def _parse_doc_content_for_apis(self, content: str, source_url: str) -> Dict[str, Dict]:
+    def _parse_doc_content_for_apis(self, content: str, source_url: str) -> dict[str, dict]:
        """
        Parse documentation content to extract API signatures.

@@ -121,13 +123,13 @@ class ConflictDetector:
        # Pattern for common API signatures
        patterns = [
            # Python style: def name(params) -> return
-            r'def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*(\w+))?',
+            r"def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*(\w+))?",
            # JavaScript style: function name(params)
-            r'function\s+(\w+)\s*\(([^)]*)\)',
+            r"function\s+(\w+)\s*\(([^)]*)\)",
            # C++ style: return_type name(params)
-            r'(\w+)\s+(\w+)\s*\(([^)]*)\)',
+            r"(\w+)\s+(\w+)\s*\(([^)]*)\)",
            # Method style: ClassName.method_name(params)
-            r'(\w+)\.(\w+)\s*\(([^)]*)\)'
+            r"(\w+)\.(\w+)\s*\(([^)]*)\)",
        ]

        for pattern in patterns:
@@ -135,17 +137,17 @@ class ConflictDetector:
                groups = match.groups()

                # Parse based on pattern matched
-                if 'def' in pattern:
+                if "def" in pattern:
                    # Python function
                    name = groups[0]
                    params_str = groups[1]
                    return_type = groups[2] if len(groups) > 2 else None
-                elif 'function' in pattern:
+                elif "function" in pattern:
                    # JavaScript function
                    name = groups[0]
                    params_str = groups[1]
                    return_type = None
-                elif '.' in pattern:
+                elif "." in pattern:
                    # Class method
                    class_name = groups[0]
                    method_name = groups[1]
@@ -162,54 +164,54 @@ class ConflictDetector:
                params = self._parse_param_string(params_str)

                apis[name] = {
-                    'name': name,
-                    'parameters': params,
-                    'return_type': return_type,
-                    'source': source_url,
-                    'raw_signature': match.group(0)
+                    "name": name,
+                    "parameters": params,
+                    "return_type": return_type,
+                    "source": source_url,
+                    "raw_signature": match.group(0),
                }

        return apis

-    def _parse_param_string(self, params_str: str) -> List[Dict]:
+    def _parse_param_string(self, params_str: str) -> list[dict]:
        """Parse parameter string into list of parameter dicts."""
        if not params_str.strip():
            return []

        params = []
-        for param in params_str.split(','):
+        for param in params_str.split(","):
            param = param.strip()
            if not param:
                continue

            # Try to extract name and type
-            param_info = {'name': param, 'type': None, 'default': None}
+            param_info = {"name": param, "type": None, "default": None}

            # Check for type annotation (: type)
-            if ':' in param:
-                parts = param.split(':', 1)
-                param_info['name'] = parts[0].strip()
+            if ":" in param:
+                parts = param.split(":", 1)
+                param_info["name"] = parts[0].strip()
                type_part = parts[1].strip()

                # Check for default value (= value)
-                if '=' in type_part:
-                    type_str, default_str = type_part.split('=', 1)
-                    param_info['type'] = type_str.strip()
-                    param_info['default'] = default_str.strip()
+                if "=" in type_part:
+                    type_str, default_str = type_part.split("=", 1)
+                    param_info["type"] = type_str.strip()
+                    param_info["default"] = default_str.strip()
                else:
-                    param_info['type'] = type_part
+                    param_info["type"] = type_part

            # Check for default without type (= value)
-            elif '=' in param:
-                parts = param.split('=', 1)
-                param_info['name'] = parts[0].strip()
-                param_info['default'] = parts[1].strip()
+            elif "=" in param:
+                parts = param.split("=", 1)
+                param_info["name"] = parts[0].strip()
+                param_info["default"] = parts[1].strip()

            params.append(param_info)

        return params

-    def _extract_code_apis(self) -> Dict[str, Dict[str, Any]]:
+    def _extract_code_apis(self) -> dict[str, dict[str, Any]]:
        """
        Extract API information from GitHub code analysis.

@@ -218,61 +220,61 @@ class ConflictDetector:
        """
        apis = {}

-        code_analysis = self.github_data.get('code_analysis', {})
+        code_analysis = self.github_data.get("code_analysis", {})
        if not code_analysis:
            return apis

        # Support both 'files' and 'analyzed_files' keys
-        files = code_analysis.get('files', code_analysis.get('analyzed_files', []))
+        files = code_analysis.get("files", code_analysis.get("analyzed_files", []))

        for file_info in files:
-            file_path = file_info.get('file', 'unknown')
+            file_path = file_info.get("file", "unknown")

            # Extract classes and their methods
-            for class_info in file_info.get('classes', []):
-                class_name = class_info['name']
+            for class_info in file_info.get("classes", []):
+                class_name = class_info["name"]

                # Add class itself
                apis[class_name] = {
-                    'name': class_name,
-                    'type': 'class',
-                    'source': file_path,
-                    'line': class_info.get('line_number'),
-                    'base_classes': class_info.get('base_classes', []),
-                    'docstring': class_info.get('docstring')
+                    "name": class_name,
+                    "type": "class",
+                    "source": file_path,
+                    "line": class_info.get("line_number"),
+                    "base_classes": class_info.get("base_classes", []),
+                    "docstring": class_info.get("docstring"),
                }

                # Add methods
-                for method in class_info.get('methods', []):
+                for method in class_info.get("methods", []):
                    method_name = f"{class_name}.{method['name']}"
                    apis[method_name] = {
-                        'name': method_name,
-                        'type': 'method',
-                        'parameters': method.get('parameters', []),
-                        'return_type': method.get('return_type'),
-                        'source': file_path,
-                        'line': method.get('line_number'),
-                        'docstring': method.get('docstring'),
-                        'is_async': method.get('is_async', False)
+                        "name": method_name,
+                        "type": "method",
+                        "parameters": method.get("parameters", []),
+                        "return_type": method.get("return_type"),
+                        "source": file_path,
+                        "line": method.get("line_number"),
+                        "docstring": method.get("docstring"),
+                        "is_async": method.get("is_async", False),
                    }

            # Extract standalone functions
-            for func_info in file_info.get('functions', []):
-                func_name = func_info['name']
+            for func_info in file_info.get("functions", []):
+                func_name = func_info["name"]
                apis[func_name] = {
-                    'name': func_name,
-                    'type': 'function',
-                    'parameters': func_info.get('parameters', []),
-                    'return_type': func_info.get('return_type'),
-                    'source': file_path,
-                    'line': func_info.get('line_number'),
-                    'docstring': func_info.get('docstring'),
-                    'is_async': func_info.get('is_async', False)
+                    "name": func_name,
+                    "type": "function",
+                    "parameters": func_info.get("parameters", []),
+                    "return_type": func_info.get("return_type"),
+                    "source": file_path,
+                    "line": func_info.get("line_number"),
+                    "docstring": func_info.get("docstring"),
+                    "is_async": func_info.get("is_async", False),
                }

        return apis

-    def detect_all_conflicts(self) -> List[Conflict]:
+    def detect_all_conflicts(self) -> list[Conflict]:
        """
        Detect all types of conflicts.

@@ -296,7 +298,7 @@ class ConflictDetector:

        return conflicts

-    def _find_missing_in_docs(self) -> List[Conflict]:
+    def _find_missing_in_docs(self) -> list[Conflict]:
        """Find APIs that exist in code but not in documentation."""
        conflicts = []

@@ -304,40 +306,46 @@ class ConflictDetector:
            # Simple name matching (can be enhanced with fuzzy matching)
            if api_name not in self.docs_apis:
                # Check if it's a private/internal API (often not documented)
-                is_private = api_name.startswith('_') or '__' in api_name
-                severity = 'low' if is_private else 'medium'
+                is_private = api_name.startswith("_") or "__" in api_name
+                severity = "low" if is_private else "medium"

-                conflicts.append(Conflict(
-                    type='missing_in_docs',
-                    severity=severity,
-                    api_name=api_name,
-                    code_info=code_info,
-                    difference=f"API exists in code ({code_info['source']}) but not found in documentation",
-                    suggestion="Add documentation for this API" if not is_private else "Consider if this internal API should be documented"
-                ))
+                conflicts.append(
+                    Conflict(
+                        type="missing_in_docs",
+                        severity=severity,
+                        api_name=api_name,
+                        code_info=code_info,
+                        difference=f"API exists in code ({code_info['source']}) but not found in documentation",
+                        suggestion="Add documentation for this API"
+                        if not is_private
+                        else "Consider if this internal API should be documented",
+                    )
+                )

        logger.info(f"Found {len(conflicts)} APIs missing in documentation")
        return conflicts

-    def _find_missing_in_code(self) -> List[Conflict]:
+    def _find_missing_in_code(self) -> list[Conflict]:
        """Find APIs that are documented but don't exist in code."""
        conflicts = []

        for api_name, docs_info in self.docs_apis.items():
            if api_name not in self.code_apis:
-                conflicts.append(Conflict(
-                    type='missing_in_code',
-                    severity='high',  # This is serious - documented but doesn't exist
-                    api_name=api_name,
-                    docs_info=docs_info,
-                    difference=f"API documented ({docs_info.get('source', 'unknown')}) but not found in code",
-                    suggestion="Update documentation to remove this API, or add it to codebase"
-                ))
+                conflicts.append(
+                    Conflict(
+                        type="missing_in_code",
+                        severity="high",  # This is serious - documented but doesn't exist
+                        api_name=api_name,
+                        docs_info=docs_info,
+                        difference=f"API documented ({docs_info.get('source', 'unknown')}) but not found in code",
+                        suggestion="Update documentation to remove this API, or add it to codebase",
+                    )
+                )

        logger.info(f"Found {len(conflicts)} APIs missing in code")
        return conflicts

-    def _find_signature_mismatches(self) -> List[Conflict]:
+    def _find_signature_mismatches(self) -> list[Conflict]:
        """Find APIs where signature differs between docs and code."""
        conflicts = []

@@ -352,41 +360,43 @@ class ConflictDetector:
            mismatch = self._compare_signatures(docs_info, code_info)

            if mismatch:
-                conflicts.append(Conflict(
-                    type='signature_mismatch',
-                    severity=mismatch['severity'],
-                    api_name=api_name,
-                    docs_info=docs_info,
-                    code_info=code_info,
-                    difference=mismatch['difference'],
-                    suggestion=mismatch['suggestion']
-                ))
+                conflicts.append(
+                    Conflict(
+                        type="signature_mismatch",
+                        severity=mismatch["severity"],
+                        api_name=api_name,
+                        docs_info=docs_info,
+                        code_info=code_info,
+                        difference=mismatch["difference"],
+                        suggestion=mismatch["suggestion"],
+                    )
+                )

        logger.info(f"Found {len(conflicts)} signature mismatches")
        return conflicts

-    def _compare_signatures(self, docs_info: Dict, code_info: Dict) -> Optional[Dict]:
+    def _compare_signatures(self, docs_info: dict, code_info: dict) -> dict | None:
        """
        Compare signatures between docs and code.

        Returns:
            Dict with mismatch details if conflict found, None otherwise
        """
-        docs_params = docs_info.get('parameters', [])
-        code_params = code_info.get('parameters', [])
+        docs_params = docs_info.get("parameters", [])
+        code_params = code_info.get("parameters", [])

        # Compare parameter counts
        if len(docs_params) != len(code_params):
            return {
-                'severity': 'medium',
-                'difference': f"Parameter count mismatch: docs has {len(docs_params)}, code has {len(code_params)}",
-                'suggestion': f"Documentation shows {len(docs_params)} parameters, but code has {len(code_params)}"
+                "severity": "medium",
+                "difference": f"Parameter count mismatch: docs has {len(docs_params)}, code has {len(code_params)}",
+                "suggestion": f"Documentation shows {len(docs_params)} parameters, but code has {len(code_params)}",
            }

        # Compare parameter names and types
        for i, (doc_param, code_param) in enumerate(zip(docs_params, code_params)):
-            doc_name = doc_param.get('name', '')
-            code_name = code_param.get('name', '')
+            doc_name = doc_param.get("name", "")
+            code_name = code_param.get("name", "")

            # Parameter name mismatch
            if doc_name != code_name:
@@ -394,36 +404,36 @@ class ConflictDetector:
                similarity = SequenceMatcher(None, doc_name, code_name).ratio()
                if similarity < 0.8:  # Not similar enough
                    return {
-                        'severity': 'medium',
-                        'difference': f"Parameter {i+1} name mismatch: '{doc_name}' in docs vs '{code_name}' in code",
-                        'suggestion': f"Update documentation to use parameter name '{code_name}'"
+                        "severity": "medium",
+                        "difference": f"Parameter {i + 1} name mismatch: '{doc_name}' in docs vs '{code_name}' in code",
+                        "suggestion": f"Update documentation to use parameter name '{code_name}'",
                    }

            # Type mismatch
-            doc_type = doc_param.get('type')
-            code_type = code_param.get('type_hint')
+            doc_type = doc_param.get("type")
+            code_type = code_param.get("type_hint")

            if doc_type and code_type and doc_type != code_type:
                return {
-                    'severity': 'low',
-                    'difference': f"Parameter '{doc_name}' type mismatch: '{doc_type}' in docs vs '{code_type}' in code",
-                    'suggestion': f"Verify correct type for parameter '{doc_name}'"
+                    "severity": "low",
+                    "difference": f"Parameter '{doc_name}' type mismatch: '{doc_type}' in docs vs '{code_type}' in code",
+                    "suggestion": f"Verify correct type for parameter '{doc_name}'",
                }

        # Compare return types if both have them
-        docs_return = docs_info.get('return_type')
-        code_return = code_info.get('return_type')
+        docs_return = docs_info.get("return_type")
+        code_return = code_info.get("return_type")

        if docs_return and code_return and docs_return != code_return:
            return {
-                'severity': 'low',
-                'difference': f"Return type mismatch: '{docs_return}' in docs vs '{code_return}' in code",
-                'suggestion': "Verify correct return type"
+                "severity": "low",
+                "difference": f"Return type mismatch: '{docs_return}' in docs vs '{code_return}' in code",
+                "suggestion": "Verify correct return type",
            }

        return None

-    def generate_summary(self, conflicts: List[Conflict]) -> Dict[str, Any]:
+    def generate_summary(self, conflicts: list[Conflict]) -> dict[str, Any]:
        """
        Generate summary statistics for conflicts.

@@ -434,25 +444,25 @@ class ConflictDetector:
            Summary dict with statistics
        """
        summary = {
-            'total': len(conflicts),
-            'by_type': {},
-            'by_severity': {},
-            'apis_affected': len(set(c.api_name for c in conflicts))
+            "total": len(conflicts),
+            "by_type": {},
+            "by_severity": {},
+            "apis_affected": len(set(c.api_name for c in conflicts)),
        }

        # Count by type
-        for conflict_type in ['missing_in_docs', 'missing_in_code', 'signature_mismatch', 'description_mismatch']:
+        for conflict_type in ["missing_in_docs", "missing_in_code", "signature_mismatch", "description_mismatch"]:
            count = sum(1 for c in conflicts if c.type == conflict_type)
-            summary['by_type'][conflict_type] = count
+            summary["by_type"][conflict_type] = count

        # Count by severity
-        for severity in ['low', 'medium', 'high']:
+        for severity in ["low", "medium", "high"]:
            count = sum(1 for c in conflicts if c.severity == severity)
-            summary['by_severity'][severity] = count
+            summary["by_severity"][severity] = count

        return summary

-    def save_conflicts(self, conflicts: List[Conflict], output_path: str):
+    def save_conflicts(self, conflicts: list[Conflict], output_path: str):
        """
        Save conflicts to JSON file.

@@ -460,18 +470,15 @@ class ConflictDetector:
            conflicts: List of Conflict objects
            output_path: Path to output JSON file
        """
-        data = {
-            'conflicts': [asdict(c) for c in conflicts],
-            'summary': self.generate_summary(conflicts)
-        }
+        data = {"conflicts": [asdict(c) for c in conflicts], "summary": self.generate_summary(conflicts)}

-        with open(output_path, 'w', encoding='utf-8') as f:
+        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        logger.info(f"Conflicts saved to: {output_path}")


-if __name__ == '__main__':
+if __name__ == "__main__":
    import sys

    if len(sys.argv) < 3:
@@ -482,10 +489,10 @@ if __name__ == '__main__':
    github_file = sys.argv[2]

    # Load data
-    with open(docs_file, 'r') as f:
+    with open(docs_file) as f:
        docs_data = json.load(f)

-    with open(github_file, 'r') as f:
+    with open(github_file) as f:
        github_data = json.load(f)

    # Detect conflicts
@@ -498,16 +505,16 @@ if __name__ == '__main__':
    print(f"   Total conflicts: {summary['total']}")
    print(f"   APIs affected: {summary['apis_affected']}")
    print("\n   By Type:")
-    for conflict_type, count in summary['by_type'].items():
+    for conflict_type, count in summary["by_type"].items():
        if count > 0:
            print(f"     {conflict_type}: {count}")
    print("\n   By Severity:")
-    for severity, count in summary['by_severity'].items():
+    for severity, count in summary["by_severity"].items():
        if count > 0:
-            emoji = '🔴' if severity == 'high' else '🟡' if severity == 'medium' else '🟢'
+            emoji = "🔴" if severity == "high" else "🟡" if severity == "medium" else "🟢"
            print(f"     {emoji} {severity}: {count}")

    # Save to file
-    output_file = 'conflicts.json'
+    output_file = "conflicts.json"
    detector.save_conflicts(conflicts, output_file)
    print(f"\n✅ Full report saved to: {output_file}")