This commit is contained in:
Pablo Estevez
2026-01-17 17:29:21 +00:00
parent c89f059712
commit 5ed767ff9a
144 changed files with 14142 additions and 16488 deletions

View File

@@ -27,19 +27,18 @@ Example usage:
python test_example_extractor.py tests/ --min-confidence 0.7
"""
from dataclasses import dataclass, field, asdict
from typing import List, Dict, Optional, Literal, Set
from pathlib import Path
import ast
import re
import hashlib
import logging
import argparse
import ast
import hashlib
import json
import sys
import logging
import re
from dataclasses import asdict, dataclass, field
from pathlib import Path
from typing import Literal
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
@@ -47,22 +46,23 @@ logger = logging.getLogger(__name__)
# DATA MODELS
# ============================================================================
@dataclass
class TestExample:
"""Single extracted usage example from test code"""
# Identity
example_id: str # Unique hash of example
test_name: str # Test function/method name
example_id: str # Unique hash of example
test_name: str # Test function/method name
category: Literal["instantiation", "method_call", "config", "setup", "workflow"]
# Code
code: str # Actual example code
language: str # Programming language
code: str # Actual example code
language: str # Programming language
# Context
description: str # What this demonstrates
expected_behavior: str # Expected outcome from assertions
description: str # What this demonstrates
expected_behavior: str # Expected outcome from assertions
# Source
file_path: str
@@ -71,13 +71,13 @@ class TestExample:
# Quality
complexity_score: float # 0-1 scale (higher = more complex/valuable)
confidence: float # 0-1 scale (higher = more confident extraction)
confidence: float # 0-1 scale (higher = more confident extraction)
# Optional fields (must come after required fields)
setup_code: Optional[str] = None # Required setup code
tags: List[str] = field(default_factory=list) # ["pytest", "mock", "async"]
dependencies: List[str] = field(default_factory=list) # Imported modules
ai_analysis: Optional[Dict] = None # AI-generated analysis (C3.6)
setup_code: str | None = None # Required setup code
tags: list[str] = field(default_factory=list) # ["pytest", "mock", "async"]
dependencies: list[str] = field(default_factory=list) # Imported modules
ai_analysis: dict | None = None # AI-generated analysis (C3.6)
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization"""
@@ -96,12 +96,12 @@ class TestExample:
# Add AI analysis if available (C3.6)
if self.ai_analysis:
md += f"\n**🤖 AI Analysis:** \n"
if self.ai_analysis.get('explanation'):
md += "\n**🤖 AI Analysis:** \n"
if self.ai_analysis.get("explanation"):
md += f"*{self.ai_analysis['explanation']}* \n"
if self.ai_analysis.get('best_practices'):
if self.ai_analysis.get("best_practices"):
md += f"**Best Practices:** {', '.join(self.ai_analysis['best_practices'])} \n"
if self.ai_analysis.get('tutorial_group'):
if self.ai_analysis.get("tutorial_group"):
md += f"**Tutorial Group:** {self.ai_analysis['tutorial_group']} \n"
md += f"\n```{self.language.lower()}\n"
@@ -117,13 +117,13 @@ class ExampleReport:
"""Summary of test example extraction results"""
total_examples: int
examples_by_category: Dict[str, int]
examples_by_language: Dict[str, int]
examples: List[TestExample]
examples_by_category: dict[str, int]
examples_by_language: dict[str, int]
examples: list[TestExample]
avg_complexity: float
high_value_count: int # confidence > 0.7
file_path: Optional[str] = None # If single file
directory: Optional[str] = None # If directory
file_path: str | None = None # If single file
directory: str | None = None # If directory
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization"""
@@ -135,7 +135,7 @@ class ExampleReport:
"high_value_count": self.high_value_count,
"file_path": self.file_path,
"directory": self.directory,
"examples": [ex.to_dict() for ex in self.examples]
"examples": [ex.to_dict() for ex in self.examples],
}
def to_markdown(self) -> str:
@@ -164,19 +164,20 @@ class ExampleReport:
# PYTHON TEST ANALYZER (AST-based)
# ============================================================================
class PythonTestAnalyzer:
"""Deep AST-based test example extraction for Python"""
def __init__(self):
self.trivial_patterns = {
'assertTrue(True)',
'assertFalse(False)',
'assertEqual(1, 1)',
'assertIsNone(None)',
'assertIsNotNone(None)',
"assertTrue(True)",
"assertFalse(False)",
"assertEqual(1, 1)",
"assertIsNone(None)",
"assertIsNotNone(None)",
}
def extract(self, file_path: str, code: str) -> List[TestExample]:
def extract(self, file_path: str, code: str) -> list[TestExample]:
"""Extract examples from Python test file"""
examples = []
@@ -193,20 +194,16 @@ class PythonTestAnalyzer:
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
if self._is_test_class(node):
examples.extend(self._extract_from_test_class(
node, file_path, imports
))
examples.extend(self._extract_from_test_class(node, file_path, imports))
# Find test functions (pytest)
elif isinstance(node, ast.FunctionDef):
if self._is_test_function(node):
examples.extend(self._extract_from_test_function(
node, file_path, imports
))
examples.extend(self._extract_from_test_function(node, file_path, imports))
return examples
def _extract_imports(self, tree: ast.AST) -> List[str]:
def _extract_imports(self, tree: ast.AST) -> list[str]:
"""Extract imported modules"""
imports = []
for node in ast.walk(tree):
@@ -221,30 +218,30 @@ class PythonTestAnalyzer:
"""Check if class is a test class"""
# unittest.TestCase pattern
for base in node.bases:
if isinstance(base, ast.Name) and 'Test' in base.id:
return True
elif isinstance(base, ast.Attribute) and base.attr == 'TestCase':
if (
isinstance(base, ast.Name)
and "Test" in base.id
or isinstance(base, ast.Attribute)
and base.attr == "TestCase"
):
return True
return False
def _is_test_function(self, node: ast.FunctionDef) -> bool:
"""Check if function is a test function"""
# pytest pattern: starts with test_
if node.name.startswith('test_'):
if node.name.startswith("test_"):
return True
# Has @pytest.mark decorator
for decorator in node.decorator_list:
if isinstance(decorator, ast.Attribute):
if 'pytest' in ast.unparse(decorator):
if "pytest" in ast.unparse(decorator):
return True
return False
def _extract_from_test_class(
self,
class_node: ast.ClassDef,
file_path: str,
imports: List[str]
) -> List[TestExample]:
self, class_node: ast.ClassDef, file_path: str, imports: list[str]
) -> list[TestExample]:
"""Extract examples from unittest.TestCase class"""
examples = []
@@ -253,63 +250,46 @@ class PythonTestAnalyzer:
# Process each test method
for node in class_node.body:
if isinstance(node, ast.FunctionDef) and node.name.startswith('test_'):
examples.extend(self._analyze_test_body(
node,
file_path,
imports,
setup_code=setup_code
))
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
examples.extend(self._analyze_test_body(node, file_path, imports, setup_code=setup_code))
return examples
def _extract_from_test_function(
self,
func_node: ast.FunctionDef,
file_path: str,
imports: List[str]
) -> List[TestExample]:
self, func_node: ast.FunctionDef, file_path: str, imports: list[str]
) -> list[TestExample]:
"""Extract examples from pytest test function"""
# Check for fixture parameters
fixture_setup = self._extract_fixtures(func_node)
return self._analyze_test_body(
func_node,
file_path,
imports,
setup_code=fixture_setup
)
return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup)
def _extract_setup_method(self, class_node: ast.ClassDef) -> Optional[str]:
def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None:
"""Extract setUp method code"""
for node in class_node.body:
if isinstance(node, ast.FunctionDef) and node.name == 'setUp':
if isinstance(node, ast.FunctionDef) and node.name == "setUp":
return ast.unparse(node.body)
return None
def _extract_fixtures(self, func_node: ast.FunctionDef) -> Optional[str]:
def _extract_fixtures(self, func_node: ast.FunctionDef) -> str | None:
"""Extract pytest fixture parameters"""
if not func_node.args.args:
return None
# Skip 'self' parameter
params = [arg.arg for arg in func_node.args.args if arg.arg != 'self']
params = [arg.arg for arg in func_node.args.args if arg.arg != "self"]
if params:
return f"# Fixtures: {', '.join(params)}"
return None
def _analyze_test_body(
self,
func_node: ast.FunctionDef,
file_path: str,
imports: List[str],
setup_code: Optional[str] = None
) -> List[TestExample]:
self, func_node: ast.FunctionDef, file_path: str, imports: list[str], setup_code: str | None = None
) -> list[TestExample]:
"""Analyze test function body for extractable patterns"""
examples = []
# Get docstring for description
docstring = ast.get_docstring(func_node) or func_node.name.replace('_', ' ')
docstring = ast.get_docstring(func_node) or func_node.name.replace("_", " ")
# Detect tags
tags = self._detect_tags(func_node, imports)
@@ -321,7 +301,9 @@ class PythonTestAnalyzer:
examples.extend(instantiations)
# 2. Method calls with assertions
method_calls = self._find_method_calls_with_assertions(func_node, file_path, docstring, setup_code, tags, imports)
method_calls = self._find_method_calls_with_assertions(
func_node, file_path, docstring, setup_code, tags, imports
)
examples.extend(method_calls)
# 3. Configuration dictionaries
@@ -334,28 +316,28 @@ class PythonTestAnalyzer:
return examples
def _detect_tags(self, func_node: ast.FunctionDef, imports: List[str]) -> List[str]:
def _detect_tags(self, func_node: ast.FunctionDef, imports: list[str]) -> list[str]:
"""Detect test tags (pytest, mock, async, etc.)"""
tags = []
# Check decorators
for decorator in func_node.decorator_list:
decorator_str = ast.unparse(decorator).lower()
if 'pytest' in decorator_str:
tags.append('pytest')
if 'mock' in decorator_str:
tags.append('mock')
if 'async' in decorator_str or func_node.name.startswith('test_async'):
tags.append('async')
if "pytest" in decorator_str:
tags.append("pytest")
if "mock" in decorator_str:
tags.append("mock")
if "async" in decorator_str or func_node.name.startswith("test_async"):
tags.append("async")
# Check if using unittest
if 'unittest' in imports:
tags.append('unittest')
if "unittest" in imports:
tags.append("unittest")
# Check function body for mock usage
func_str = ast.unparse(func_node).lower()
if 'mock' in func_str or 'patch' in func_str:
tags.append('mock')
if "mock" in func_str or "patch" in func_str:
tags.append("mock")
return list(set(tags))
@@ -364,10 +346,10 @@ class PythonTestAnalyzer:
func_node: ast.FunctionDef,
file_path: str,
description: str,
setup_code: Optional[str],
tags: List[str],
imports: List[str]
) -> List[TestExample]:
setup_code: str | None,
tags: list[str],
imports: list[str],
) -> list[TestExample]:
"""Find object instantiation patterns: obj = ClassName(...)"""
examples = []
@@ -379,7 +361,7 @@ class PythonTestAnalyzer:
code = ast.unparse(node)
# Skip trivial or mock-only
if len(code) < 20 or 'Mock()' in code:
if len(code) < 20 or "Mock()" in code:
continue
# Get class name
@@ -400,7 +382,7 @@ class PythonTestAnalyzer:
complexity_score=self._calculate_complexity(code),
confidence=0.8,
tags=tags,
dependencies=imports
dependencies=imports,
)
examples.append(example)
@@ -411,10 +393,10 @@ class PythonTestAnalyzer:
func_node: ast.FunctionDef,
file_path: str,
description: str,
setup_code: Optional[str],
tags: List[str],
imports: List[str]
) -> List[TestExample]:
setup_code: str | None,
tags: list[str],
imports: list[str],
) -> list[TestExample]:
"""Find method calls followed by assertions"""
examples = []
@@ -450,7 +432,7 @@ class PythonTestAnalyzer:
complexity_score=self._calculate_complexity(code),
confidence=0.85,
tags=tags,
dependencies=imports
dependencies=imports,
)
examples.append(example)
@@ -461,10 +443,10 @@ class PythonTestAnalyzer:
func_node: ast.FunctionDef,
file_path: str,
description: str,
setup_code: Optional[str],
tags: List[str],
imports: List[str]
) -> List[TestExample]:
setup_code: str | None,
tags: list[str],
imports: list[str],
) -> list[TestExample]:
"""Find configuration dictionary patterns"""
examples = []
@@ -491,7 +473,7 @@ class PythonTestAnalyzer:
complexity_score=self._calculate_complexity(code),
confidence=0.75,
tags=tags,
dependencies=imports
dependencies=imports,
)
examples.append(example)
@@ -502,10 +484,10 @@ class PythonTestAnalyzer:
func_node: ast.FunctionDef,
file_path: str,
description: str,
setup_code: Optional[str],
tags: List[str],
imports: List[str]
) -> List[TestExample]:
setup_code: str | None,
tags: list[str],
imports: list[str],
) -> list[TestExample]:
"""Find multi-step workflow patterns (integration tests)"""
examples = []
@@ -515,7 +497,7 @@ class PythonTestAnalyzer:
code = ast.unparse(func_node.body)
# Skip if too long (> 30 lines)
if code.count('\n') > 30:
if code.count("\n") > 30:
return examples
example = TestExample(
@@ -532,8 +514,8 @@ class PythonTestAnalyzer:
line_end=func_node.end_lineno or func_node.lineno,
complexity_score=min(1.0, len(func_node.body) / 10),
confidence=0.9,
tags=tags + ['workflow', 'integration'],
dependencies=imports
tags=tags + ["workflow", "integration"],
dependencies=imports,
)
examples.append(example)
@@ -568,7 +550,7 @@ class PythonTestAnalyzer:
if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
call_str = ast.unparse(node.value).lower()
assertion_methods = ['assert', 'expect', 'should']
assertion_methods = ["assert", "expect", "should"]
return any(method in call_str for method in assertion_methods)
return False
@@ -584,7 +566,7 @@ class PythonTestAnalyzer:
def _is_integration_test(self, func_node: ast.FunctionDef) -> bool:
"""Check if test looks like an integration test"""
test_name = func_node.name.lower()
integration_keywords = ['workflow', 'integration', 'end_to_end', 'e2e', 'full']
integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"]
return any(keyword in test_name for keyword in integration_keywords)
def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str:
@@ -608,8 +590,8 @@ class PythonTestAnalyzer:
def _calculate_complexity(self, code: str) -> float:
"""Calculate code complexity score (0-1)"""
# Simple heuristic: more lines + more parameters = more complex
lines = code.count('\n') + 1
params = code.count(',') + 1
lines = code.count("\n") + 1
params = code.count(",") + 1
complexity = min(1.0, (lines * 0.1) + (params * 0.05))
return round(complexity, 2)
@@ -623,57 +605,58 @@ class PythonTestAnalyzer:
# GENERIC TEST ANALYZER (Regex-based for non-Python languages)
# ============================================================================
class GenericTestAnalyzer:
"""Regex-based test example extraction for non-Python languages"""
# Language-specific regex patterns
PATTERNS = {
"javascript": {
"instantiation": r'(?:const|let|var)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)',
"assertion": r'expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)',
"instantiation": r"(?:const|let|var)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
"assertion": r"expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)",
"test_function": r'(?:test|it)\(["\']([^"\']+)["\']',
"config": r'(?:const|let)\s+config\s*=\s*\{[\s\S]{20,500}?\}',
"config": r"(?:const|let)\s+config\s*=\s*\{[\s\S]{20,500}?\}",
},
"typescript": {
"instantiation": r'(?:const|let|var)\s+(\w+):\s*\w+\s*=\s*new\s+(\w+)\(([^)]*)\)',
"assertion": r'expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)',
"instantiation": r"(?:const|let|var)\s+(\w+):\s*\w+\s*=\s*new\s+(\w+)\(([^)]*)\)",
"assertion": r"expect\(([^)]+)\)\.to(?:Equal|Be|Match)\(([^)]+)\)",
"test_function": r'(?:test|it)\(["\']([^"\']+)["\']',
"config": r'(?:const|let)\s+config:\s*\w+\s*=\s*\{[\s\S]{20,500}?\}',
"config": r"(?:const|let)\s+config:\s*\w+\s*=\s*\{[\s\S]{20,500}?\}",
},
"go": {
"instantiation": r'(\w+)\s*:=\s*(\w+)\{([^}]+)\}',
"instantiation": r"(\w+)\s*:=\s*(\w+)\{([^}]+)\}",
"assertion": r't\.(?:Error|Fatal)(?:f)?\(["\']([^"\']+)["\']',
"test_function": r'func\s+(Test\w+)\(t\s+\*testing\.T\)',
"table_test": r'tests\s*:=\s*\[\]struct\s*\{[\s\S]{50,1000}?\}',
"test_function": r"func\s+(Test\w+)\(t\s+\*testing\.T\)",
"table_test": r"tests\s*:=\s*\[\]struct\s*\{[\s\S]{50,1000}?\}",
},
"rust": {
"instantiation": r'let\s+(\w+)\s*=\s*(\w+)::new\(([^)]*)\)',
"assertion": r'assert(?:_eq)?!\(([^)]+)\)',
"test_function": r'#\[test\]\s*fn\s+(\w+)\(\)',
"instantiation": r"let\s+(\w+)\s*=\s*(\w+)::new\(([^)]*)\)",
"assertion": r"assert(?:_eq)?!\(([^)]+)\)",
"test_function": r"#\[test\]\s*fn\s+(\w+)\(\)",
},
"java": {
"instantiation": r'(\w+)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)',
"assertion": r'assert(?:Equals|True|False|NotNull)\(([^)]+)\)',
"test_function": r'@Test\s+public\s+void\s+(\w+)\(\)',
"instantiation": r"(\w+)\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
"assertion": r"assert(?:Equals|True|False|NotNull)\(([^)]+)\)",
"test_function": r"@Test\s+public\s+void\s+(\w+)\(\)",
},
"csharp": {
"instantiation": r'var\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)',
"assertion": r'Assert\.(?:AreEqual|IsTrue|IsFalse|IsNotNull)\(([^)]+)\)',
"test_function": r'\[Test\]\s+public\s+void\s+(\w+)\(\)',
"instantiation": r"var\s+(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
"assertion": r"Assert\.(?:AreEqual|IsTrue|IsFalse|IsNotNull)\(([^)]+)\)",
"test_function": r"\[Test\]\s+public\s+void\s+(\w+)\(\)",
},
"php": {
"instantiation": r'\$(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)',
"assertion": r'\$this->assert(?:Equals|True|False|NotNull)\(([^)]+)\)',
"test_function": r'public\s+function\s+(test\w+)\(\)',
"instantiation": r"\$(\w+)\s*=\s*new\s+(\w+)\(([^)]*)\)",
"assertion": r"\$this->assert(?:Equals|True|False|NotNull)\(([^)]+)\)",
"test_function": r"public\s+function\s+(test\w+)\(\)",
},
"ruby": {
"instantiation": r'(\w+)\s*=\s*(\w+)\.new\(([^)]*)\)',
"assertion": r'expect\(([^)]+)\)\.to\s+(?:eq|be|match)\(([^)]+)\)',
"instantiation": r"(\w+)\s*=\s*(\w+)\.new\(([^)]*)\)",
"assertion": r"expect\(([^)]+)\)\.to\s+(?:eq|be|match)\(([^)]+)\)",
"test_function": r'(?:test|it)\s+["\']([^"\']+)["\']',
}
},
}
def extract(self, file_path: str, code: str, language: str) -> List[TestExample]:
def extract(self, file_path: str, code: str, language: str) -> list[TestExample]:
"""Extract examples from test file using regex patterns"""
examples = []
@@ -704,7 +687,7 @@ class GenericTestAnalyzer:
code=inst_match.group(0),
language=language,
file_path=file_path,
line_number=code[:start_pos + inst_match.start()].count('\n') + 1
line_number=code[: start_pos + inst_match.start()].count("\n") + 1,
)
examples.append(example)
@@ -717,20 +700,14 @@ class GenericTestAnalyzer:
code=config_match.group(0),
language=language,
file_path=file_path,
line_number=code[:start_pos + config_match.start()].count('\n') + 1
line_number=code[: start_pos + config_match.start()].count("\n") + 1,
)
examples.append(example)
return examples
def _create_example(
self,
test_name: str,
category: str,
code: str,
language: str,
file_path: str,
line_number: int
self, test_name: str, category: str, code: str, language: str, file_path: str, line_number: int
) -> TestExample:
"""Create TestExample from regex match"""
return TestExample(
@@ -743,11 +720,11 @@ class GenericTestAnalyzer:
expected_behavior="",
file_path=file_path,
line_start=line_number,
line_end=line_number + code.count('\n'),
complexity_score=min(1.0, (code.count('\n') + 1) * 0.1),
line_end=line_number + code.count("\n"),
complexity_score=min(1.0, (code.count("\n") + 1) * 0.1),
confidence=0.6, # Lower confidence for regex extraction
tags=[],
dependencies=[]
dependencies=[],
)
@@ -755,6 +732,7 @@ class GenericTestAnalyzer:
# EXAMPLE QUALITY FILTER
# ============================================================================
class ExampleQualityFilter:
"""Filter out trivial or low-quality examples"""
@@ -764,16 +742,16 @@ class ExampleQualityFilter:
# Trivial patterns to exclude
self.trivial_patterns = [
'Mock()',
'MagicMock()',
'assertTrue(True)',
'assertFalse(False)',
'assertEqual(1, 1)',
'pass',
'...',
"Mock()",
"MagicMock()",
"assertTrue(True)",
"assertFalse(False)",
"assertEqual(1, 1)",
"pass",
"...",
]
def filter(self, examples: List[TestExample]) -> List[TestExample]:
def filter(self, examples: list[TestExample]) -> list[TestExample]:
"""Filter examples by quality criteria"""
filtered = []
@@ -803,42 +781,43 @@ class ExampleQualityFilter:
# TEST EXAMPLE EXTRACTOR (Main Orchestrator)
# ============================================================================
class TestExampleExtractor:
"""Main orchestrator for test example extraction"""
# Test file patterns
TEST_PATTERNS = [
'test_*.py',
'*_test.py',
'test*.js',
'*test.js',
'*_test.go',
'*_test.rs',
'Test*.java',
'Test*.cs',
'*Test.php',
'*_spec.rb',
"test_*.py",
"*_test.py",
"test*.js",
"*test.js",
"*_test.go",
"*_test.rs",
"Test*.java",
"Test*.cs",
"*Test.php",
"*_spec.rb",
]
# Language detection by extension
LANGUAGE_MAP = {
'.py': 'Python',
'.js': 'JavaScript',
'.ts': 'TypeScript',
'.go': 'Go',
'.rs': 'Rust',
'.java': 'Java',
'.cs': 'C#',
'.php': 'PHP',
'.rb': 'Ruby',
".py": "Python",
".js": "JavaScript",
".ts": "TypeScript",
".go": "Go",
".rs": "Rust",
".java": "Java",
".cs": "C#",
".php": "PHP",
".rb": "Ruby",
}
def __init__(
self,
min_confidence: float = 0.7,
max_per_file: int = 10,
languages: Optional[List[str]] = None,
enhance_with_ai: bool = True
languages: list[str] | None = None,
enhance_with_ai: bool = True,
):
self.python_analyzer = PythonTestAnalyzer()
self.generic_analyzer = GenericTestAnalyzer()
@@ -852,16 +831,13 @@ class TestExampleExtractor:
if self.enhance_with_ai:
try:
from skill_seekers.cli.ai_enhancer import TestExampleEnhancer
self.ai_enhancer = TestExampleEnhancer()
except Exception as e:
logger.warning(f"⚠️ Failed to initialize AI enhancer: {e}")
self.enhance_with_ai = False
def extract_from_directory(
self,
directory: Path,
recursive: bool = True
) -> ExampleReport:
def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport:
"""Extract examples from all test files in directory"""
directory = Path(directory)
@@ -882,7 +858,7 @@ class TestExampleExtractor:
# Generate report
return self._create_report(all_examples, directory=str(directory))
def extract_from_file(self, file_path: Path) -> List[TestExample]:
def extract_from_file(self, file_path: Path) -> list[TestExample]:
"""Extract examples from single test file"""
file_path = Path(file_path)
@@ -898,13 +874,13 @@ class TestExampleExtractor:
# Read file
try:
code = file_path.read_text(encoding='utf-8')
code = file_path.read_text(encoding="utf-8")
except UnicodeDecodeError:
logger.warning(f"Failed to read {file_path} (encoding error)")
return []
# Extract examples based on language
if language == 'Python':
if language == "Python":
examples = self.python_analyzer.extract(str(file_path), code)
else:
examples = self.generic_analyzer.extract(str(file_path), code, language)
@@ -915,17 +891,13 @@ class TestExampleExtractor:
# Limit per file
if len(filtered_examples) > self.max_per_file:
# Sort by confidence and take top N
filtered_examples = sorted(
filtered_examples,
key=lambda x: x.confidence,
reverse=True
)[:self.max_per_file]
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[: self.max_per_file]
logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
return filtered_examples
def _find_test_files(self, directory: Path, recursive: bool) -> List[Path]:
def _find_test_files(self, directory: Path, recursive: bool) -> list[Path]:
"""Find test files in directory"""
test_files = []
@@ -940,13 +912,10 @@ class TestExampleExtractor:
def _detect_language(self, file_path: Path) -> str:
"""Detect programming language from file extension"""
suffix = file_path.suffix.lower()
return self.LANGUAGE_MAP.get(suffix, 'Unknown')
return self.LANGUAGE_MAP.get(suffix, "Unknown")
def _create_report(
self,
examples: List[TestExample],
file_path: Optional[str] = None,
directory: Optional[str] = None
self, examples: list[TestExample], file_path: str | None = None, directory: str | None = None
) -> ExampleReport:
"""Create summary report from examples"""
# Enhance examples with AI analysis (C3.6)
@@ -957,20 +926,18 @@ class TestExampleExtractor:
# Update examples with AI analysis
for i, example in enumerate(examples):
if i < len(enhanced_dicts) and 'ai_analysis' in enhanced_dicts[i]:
example.ai_analysis = enhanced_dicts[i]['ai_analysis']
if i < len(enhanced_dicts) and "ai_analysis" in enhanced_dicts[i]:
example.ai_analysis = enhanced_dicts[i]["ai_analysis"]
# Count by category
examples_by_category = {}
for example in examples:
examples_by_category[example.category] = \
examples_by_category.get(example.category, 0) + 1
examples_by_category[example.category] = examples_by_category.get(example.category, 0) + 1
# Count by language
examples_by_language = {}
for example in examples:
examples_by_language[example.language] = \
examples_by_language.get(example.language, 0) + 1
examples_by_language[example.language] = examples_by_language.get(example.language, 0) + 1
# Calculate averages
avg_complexity = sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
@@ -984,7 +951,7 @@ class TestExampleExtractor:
avg_complexity=round(avg_complexity, 2),
high_value_count=high_value_count,
file_path=file_path,
directory=directory
directory=directory,
)
@@ -992,10 +959,11 @@ class TestExampleExtractor:
# COMMAND-LINE INTERFACE
# ============================================================================
def main():
"""Main entry point for CLI"""
parser = argparse.ArgumentParser(
description='Extract usage examples from test files',
description="Extract usage examples from test files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
@@ -1010,49 +978,20 @@ Examples:
# Filter by confidence
%(prog)s tests/ --min-confidence 0.7
"""
""",
)
parser.add_argument("directory", nargs="?", help="Directory containing test files")
parser.add_argument("--file", help="Single test file to analyze")
parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
parser.add_argument(
'directory',
nargs='?',
help='Directory containing test files'
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)"
)
parser.add_argument("--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)")
parser.add_argument("--json", action="store_true", help="Output JSON format")
parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
parser.add_argument(
'--file',
help='Single test file to analyze'
)
parser.add_argument(
'--language',
help='Filter by programming language (python, javascript, etc.)'
)
parser.add_argument(
'--min-confidence',
type=float,
default=0.5,
help='Minimum confidence threshold (0.0-1.0, default: 0.5)'
)
parser.add_argument(
'--max-per-file',
type=int,
default=10,
help='Maximum examples per file (default: 10)'
)
parser.add_argument(
'--json',
action='store_true',
help='Output JSON format'
)
parser.add_argument(
'--markdown',
action='store_true',
help='Output Markdown format'
)
parser.add_argument(
'--recursive',
action='store_true',
default=True,
help='Search directory recursively (default: True)'
"--recursive", action="store_true", default=True, help="Search directory recursively (default: True)"
)
args = parser.parse_args()
@@ -1064,9 +1003,7 @@ Examples:
# Create extractor
languages = [args.language] if args.language else None
extractor = TestExampleExtractor(
min_confidence=args.min_confidence,
max_per_file=args.max_per_file,
languages=languages
min_confidence=args.min_confidence, max_per_file=args.max_per_file, languages=languages
)
# Extract examples
@@ -1074,10 +1011,7 @@ Examples:
examples = extractor.extract_from_file(Path(args.file))
report = extractor._create_report(examples, file_path=args.file)
else:
report = extractor.extract_from_directory(
Path(args.directory),
recursive=args.recursive
)
report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive)
# Output results
if args.json:
@@ -1086,19 +1020,19 @@ Examples:
print(report.to_markdown())
else:
# Human-readable summary
print(f"\nTest Example Extraction Results")
print(f"=" * 50)
print("\nTest Example Extraction Results")
print("=" * 50)
print(f"Total Examples: {report.total_examples}")
print(f"High Value (confidence > 0.7): {report.high_value_count}")
print(f"Average Complexity: {report.avg_complexity:.2f}")
print(f"\nExamples by Category:")
print("\nExamples by Category:")
for category, count in sorted(report.examples_by_category.items()):
print(f" {category}: {count}")
print(f"\nExamples by Language:")
print("\nExamples by Language:")
for language, count in sorted(report.examples_by_language.items()):
print(f" {language}: {count}")
print(f"\nUse --json or --markdown for detailed output")
print("\nUse --json or --markdown for detailed output")
if __name__ == '__main__':
if __name__ == "__main__":
main()