Files
skill-seekers-reference/tests/test_code_analyzer.py
yusyus 33d8500c44 feat(C2.5): Add inline comment extraction for Python/JS/C++
- Added comment extraction methods to code_analyzer.py
- Supports Python (# style), JavaScript (// and /* */), C++ (// and /* */)
- Extracts comment text, line numbers, and type (inline vs block)
- Skips Python shebang and encoding declarations
- Preserves TODO/FIXME/NOTE markers for developer notes

Implementation:
- _extract_python_comments(): Extract # comments with line tracking
- _extract_js_comments(): Extract // and /* */ comments
- _extract_cpp_comments(): Reuses JS logic (same syntax)
- Integrated into _analyze_python(), _analyze_javascript(), _analyze_cpp()

Output Format:
{
  'classes': [...],
  'functions': [...],
  'comments': [
    {'line': 5, 'text': 'TODO: Optimize', 'type': 'inline'},
    {'line': 12, 'text': 'Block comment\nwith lines', 'type': 'block'}
  ]
}

Tests:
- Added 8 comprehensive tests to test_code_analyzer.py
- Total: 30 tests passing 
- Python: Comment extraction, line numbers, shebang skip
- JavaScript: Inline comments, block comments, mixed
- C++: Comment extraction (uses JS logic)
- TODO/FIXME detection test

Related Issues:
- Closes #67 (C2.5 Extract inline comments as notes)
- Part of C2 Local Codebase Scraping roadmap (TIER 3)

Files Modified:
- src/skill_seekers/cli/code_analyzer.py (+67 lines)
- tests/test_code_analyzer.py (+194 lines)
2026-01-01 23:02:34 +03:00

747 lines
22 KiB
Python

#!/usr/bin/env python3
"""
Tests for code_analyzer.py - Code analysis at configurable depth levels.
Test Coverage:
- Python AST parsing (docstrings, signatures, decorators)
- JavaScript/TypeScript regex parsing
- C++ regex parsing
- Depth level behavior (surface/deep)
- Error handling
"""
import unittest
import sys
import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from skill_seekers.cli.code_analyzer import CodeAnalyzer
class TestPythonParsing(unittest.TestCase):
"""Tests for Python AST parsing"""
def setUp(self):
"""Set up test analyzer with deep analysis"""
self.analyzer = CodeAnalyzer(depth='deep')
def test_python_function_signature_basic(self):
"""Test basic Python function signature extraction."""
code = '''
def greet(name, age):
"""Say hello."""
return f"Hello {name}, you are {age}"
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
func = result['functions'][0]
self.assertEqual(func['name'], 'greet')
self.assertEqual(len(func['parameters']), 2)
self.assertEqual(func['parameters'][0]['name'], 'name')
self.assertEqual(func['parameters'][1]['name'], 'age')
self.assertEqual(func['docstring'], 'Say hello.')
def test_python_function_with_type_hints(self):
"""Test Python function with type annotations."""
code = '''
def add_numbers(a: int, b: int) -> int:
"""Add two integers."""
return a + b
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
self.assertIn('functions', result)
func = result['functions'][0]
self.assertEqual(func['name'], 'add_numbers')
self.assertEqual(func['return_type'], 'int')
self.assertEqual(func['parameters'][0]['type_hint'], 'int')
self.assertEqual(func['parameters'][1]['type_hint'], 'int')
self.assertEqual(func['docstring'], 'Add two integers.')
def test_python_function_with_defaults(self):
"""Test Python function with default parameter values."""
code = '''
def create_user(name: str, age: int = 18, active: bool = True) -> dict:
"""Create a user object."""
return {"name": name, "age": age, "active": active}
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
func = result['functions'][0]
self.assertEqual(func['name'], 'create_user')
# Check defaults
self.assertIsNone(func['parameters'][0]['default'])
self.assertEqual(func['parameters'][1]['default'], '18')
self.assertEqual(func['parameters'][2]['default'], 'True')
def test_python_async_function(self):
"""Test async Python function detection."""
code = '''
async def fetch_data(url: str) -> dict:
"""Fetch data from URL."""
pass
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
func = result['functions'][0]
self.assertEqual(func['name'], 'fetch_data')
self.assertTrue(func['is_async'])
self.assertEqual(func['return_type'], 'dict')
def test_python_class_extraction(self):
"""Test Python class extraction with inheritance."""
code = '''
class Animal:
"""Base animal class."""
def make_sound(self):
"""Make a sound."""
pass
class Dog(Animal):
"""Dog class."""
def bark(self):
"""Bark loudly."""
print("Woof!")
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 2)
# Check first class
animal_class = result['classes'][0]
self.assertEqual(animal_class['name'], 'Animal')
self.assertEqual(animal_class['docstring'], 'Base animal class.')
self.assertEqual(len(animal_class['methods']), 1)
self.assertEqual(animal_class['methods'][0]['name'], 'make_sound')
# Check inherited class
dog_class = result['classes'][1]
self.assertEqual(dog_class['name'], 'Dog')
self.assertEqual(dog_class['base_classes'], ['Animal'])
self.assertEqual(len(dog_class['methods']), 1)
self.assertEqual(dog_class['methods'][0]['name'], 'bark')
def test_python_docstring_extraction(self):
"""Test docstring extraction for functions and classes."""
code = '''
class Calculator:
"""A simple calculator class.
Supports basic arithmetic operations.
"""
def add(self, a, b):
"""Add two numbers.
Args:
a: First number
b: Second number
Returns:
Sum of a and b
"""
return a + b
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
# Check class docstring
calc_class = result['classes'][0]
self.assertIn('A simple calculator class', calc_class['docstring'])
self.assertIn('Supports basic arithmetic operations', calc_class['docstring'])
# Check method docstring
add_method = calc_class['methods'][0]
self.assertIn('Add two numbers', add_method['docstring'])
self.assertIn('Args:', add_method['docstring'])
self.assertIn('Returns:', add_method['docstring'])
def test_python_decorators(self):
"""Test decorator extraction."""
code = '''
class MyClass:
@property
def value(self):
"""Get value."""
return self._value
@staticmethod
def helper():
"""Static helper."""
pass
@classmethod
def from_dict(cls, data):
"""Create from dict."""
pass
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
my_class = result['classes'][0]
methods = my_class['methods']
# Check decorators
self.assertIn('property', methods[0]['decorators'])
self.assertIn('staticmethod', methods[1]['decorators'])
self.assertIn('classmethod', methods[2]['decorators'])
def test_python_syntax_error_handling(self):
"""Test handling of malformed Python code."""
code = '''
def broken_function(
# Missing closing parenthesis
return "broken"
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
# Should return empty dict or handle gracefully, not crash
self.assertIsInstance(result, dict)
# No functions should be extracted from broken code
self.assertEqual(result.get('functions', []), [])
class TestJavaScriptParsing(unittest.TestCase):
"""Tests for JavaScript/TypeScript regex parsing"""
def setUp(self):
"""Set up test analyzer with deep analysis"""
self.analyzer = CodeAnalyzer(depth='deep')
def test_javascript_function_basic(self):
"""Test basic JavaScript function extraction."""
code = '''
function greet(name, age) {
return `Hello ${name}, you are ${age}`;
}
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
self.assertIn('functions', result)
func = result['functions'][0]
self.assertEqual(func['name'], 'greet')
self.assertEqual(len(func['parameters']), 2)
self.assertEqual(func['parameters'][0]['name'], 'name')
self.assertEqual(func['parameters'][1]['name'], 'age')
def test_javascript_arrow_function(self):
"""Test arrow function detection."""
code = '''
const add = (a, b) => {
return a + b;
};
const multiply = (x, y) => x * y;
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 2)
# Check first arrow function
self.assertEqual(result['functions'][0]['name'], 'add')
self.assertEqual(len(result['functions'][0]['parameters']), 2)
def test_javascript_class_methods(self):
"""Test ES6 class method extraction.
Note: Regex-based parser has limitations in extracting all methods.
This test verifies basic method extraction works.
"""
code = '''
class User {
constructor(name, email) {
this.name = name;
this.email = email;
}
getProfile() {
return { name: this.name, email: this.email };
}
async fetchData() {
return await fetch('/api/user');
}
}
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
self.assertIn('classes', result)
user_class = result['classes'][0]
self.assertEqual(user_class['name'], 'User')
# Regex parser may not catch all methods, verify at least one method extracted
self.assertGreaterEqual(len(user_class['methods']), 1)
# Check that methods list is not empty
method_names = [m['name'] for m in user_class['methods']]
self.assertGreater(len(method_names), 0)
def test_typescript_type_annotations(self):
"""Test TypeScript type annotation extraction.
Note: Current regex-based parser extracts parameter type hints
but NOT return types. Return type extraction requires a proper
TypeScript parser (ts-morph or typescript library).
"""
code = '''
function calculate(a: number, b: number): number {
return a + b;
}
interface User {
name: string;
age: number;
}
function createUser(name: string, age: number = 18): User {
return { name, age };
}
'''
result = self.analyzer.analyze_file('test.ts', code, 'TypeScript')
self.assertIn('functions', result)
# Check first function - parameters extracted, but not return type
calc_func = result['functions'][0]
self.assertEqual(calc_func['name'], 'calculate')
self.assertEqual(calc_func['parameters'][0]['type_hint'], 'number')
# Note: return_type is None because regex parser doesn't extract it
self.assertIsNone(calc_func['return_type'])
# Check function with default
create_func = result['functions'][1]
self.assertEqual(create_func['name'], 'createUser')
self.assertEqual(create_func['parameters'][1]['default'], '18')
# Note: return_type is None (regex parser limitation)
self.assertIsNone(create_func['return_type'])
def test_javascript_async_detection(self):
"""Test async function detection in JavaScript."""
code = '''
async function fetchUser(id) {
const response = await fetch(`/api/users/${id}`);
return response.json();
}
const loadData = async () => {
return await fetchUser(1);
};
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
self.assertIn('functions', result)
self.assertGreaterEqual(len(result['functions']), 1)
# Check async function
fetch_func = result['functions'][0]
self.assertEqual(fetch_func['name'], 'fetchUser')
self.assertTrue(fetch_func['is_async'])
class TestCppParsing(unittest.TestCase):
"""Tests for C++ regex parsing"""
def setUp(self):
"""Set up test analyzer with deep analysis"""
self.analyzer = CodeAnalyzer(depth='deep')
def test_cpp_function_signature(self):
"""Test C++ function declaration parsing."""
code = '''
int add(int a, int b);
std::string getName();
void processData(const std::vector<int>& data);
'''
result = self.analyzer.analyze_file('test.h', code, 'C++')
self.assertIn('functions', result)
self.assertGreaterEqual(len(result['functions']), 2)
# Check first function
add_func = result['functions'][0]
self.assertEqual(add_func['name'], 'add')
self.assertEqual(add_func['return_type'], 'int')
def test_cpp_class_extraction(self):
"""Test C++ class extraction with inheritance."""
code = '''
class Animal {
public:
virtual void makeSound() = 0;
};
class Dog : public Animal {
public:
void makeSound() override;
void bark();
private:
std::string breed;
};
'''
result = self.analyzer.analyze_file('test.h', code, 'C++')
self.assertIn('classes', result)
self.assertEqual(len(result['classes']), 2)
# Check Animal class
animal_class = result['classes'][0]
self.assertEqual(animal_class['name'], 'Animal')
# Check Dog class with inheritance
dog_class = result['classes'][1]
self.assertEqual(dog_class['name'], 'Dog')
self.assertIn('Animal', dog_class['base_classes'])
def test_cpp_pointer_parameters(self):
"""Test C++ function with pointer/reference parameters."""
code = '''
void process(int* ptr);
void update(const int& value);
void transform(std::vector<int>* vec);
'''
result = self.analyzer.analyze_file('test.h', code, 'C++')
self.assertIn('functions', result)
self.assertGreaterEqual(len(result['functions']), 2)
# Check that parameters include pointer/reference syntax
process_func = result['functions'][0]
self.assertEqual(process_func['name'], 'process')
def test_cpp_default_parameters(self):
"""Test C++ function with default parameter values."""
code = '''
void initialize(int size = 100, bool verbose = false);
class Config {
public:
Config(std::string name = "default", int timeout = 30);
};
'''
result = self.analyzer.analyze_file('test.h', code, 'C++')
self.assertIn('functions', result)
# Check function with defaults
init_func = result['functions'][0]
self.assertEqual(init_func['name'], 'initialize')
# Verify defaults are captured
self.assertGreaterEqual(len(init_func['parameters']), 2)
class TestDepthLevels(unittest.TestCase):
"""Tests for depth level behavior"""
def test_surface_depth_returns_empty(self):
"""Test that surface depth returns empty analysis."""
analyzer = CodeAnalyzer(depth='surface')
code = '''
def test_function(a, b):
"""Test."""
return a + b
'''
result = analyzer.analyze_file('test.py', code, 'Python')
# Surface depth should return empty dict
self.assertEqual(result, {})
def test_deep_depth_extracts_signatures(self):
"""Test that deep depth extracts full signatures."""
analyzer = CodeAnalyzer(depth='deep')
code = '''
def calculate(x: int, y: int) -> int:
"""Calculate sum."""
return x + y
'''
result = analyzer.analyze_file('test.py', code, 'Python')
# Deep depth should extract full analysis
self.assertIn('functions', result)
self.assertEqual(len(result['functions']), 1)
func = result['functions'][0]
self.assertEqual(func['name'], 'calculate')
self.assertEqual(func['return_type'], 'int')
def test_unknown_language_returns_empty(self):
"""Test that unknown language returns empty dict."""
analyzer = CodeAnalyzer(depth='deep')
code = '''
func main() {
fmt.Println("Hello, Go!")
}
'''
result = analyzer.analyze_file('test.go', code, 'Go')
# Unknown language should return empty dict
self.assertEqual(result, {})
class TestIntegration(unittest.TestCase):
"""Integration tests"""
def test_analyze_file_interface(self):
"""Test the analyze_file public interface."""
analyzer = CodeAnalyzer(depth='deep')
# Test with Python code
py_code = 'def test(): pass'
result = analyzer.analyze_file('test.py', py_code, 'Python')
self.assertIsInstance(result, dict)
# Test with JavaScript code
js_code = 'function test() {}'
result = analyzer.analyze_file('test.js', js_code, 'JavaScript')
self.assertIsInstance(result, dict)
# Test with C++ code
cpp_code = 'void test();'
result = analyzer.analyze_file('test.h', cpp_code, 'C++')
self.assertIsInstance(result, dict)
def test_multiple_items_extraction(self):
"""Test extracting multiple classes and functions."""
analyzer = CodeAnalyzer(depth='deep')
code = '''
def helper_func():
"""Helper function."""
pass
class ClassA:
"""First class."""
def method_a(self):
pass
class ClassB:
"""Second class."""
def method_b(self):
pass
def main_func():
"""Main function."""
pass
'''
result = analyzer.analyze_file('test.py', code, 'Python')
# Should extract 2 standalone functions
self.assertEqual(len(result['functions']), 2)
# Should extract 2 classes
self.assertEqual(len(result['classes']), 2)
# Verify names
func_names = [f['name'] for f in result['functions']]
self.assertIn('helper_func', func_names)
self.assertIn('main_func', func_names)
class_names = [c['name'] for c in result['classes']]
self.assertIn('ClassA', class_names)
self.assertIn('ClassB', class_names)
class TestCommentExtraction(unittest.TestCase):
"""Tests for comment extraction"""
def setUp(self):
"""Set up test analyzer with deep analysis"""
self.analyzer = CodeAnalyzer(depth='deep')
def test_python_comment_extraction(self):
"""Test Python # comment extraction."""
code = '''
# This is a comment
def test_func():
# Inside function comment
x = 5 # Inline comment (not extracted due to code on same line)
return x
# Another top-level comment
class TestClass:
# Class-level comment
pass
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
self.assertIn('comments', result)
comments = result['comments']
# Should have extracted standalone comments
self.assertGreaterEqual(len(comments), 3)
# Check comment content
comment_texts = [c['text'] for c in comments]
self.assertIn('This is a comment', comment_texts)
self.assertIn('Inside function comment', comment_texts)
self.assertIn('Another top-level comment', comment_texts)
# Check all are inline type
for comment in comments:
self.assertEqual(comment['type'], 'inline')
def test_python_comment_line_numbers(self):
"""Test Python comment line number tracking."""
code = '''# Line 1 comment
def func():
# Line 3 comment
pass
# Line 5 comment
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
comments = result['comments']
self.assertEqual(len(comments), 3)
# Check line numbers
line_nums = [c['line'] for c in comments]
self.assertIn(1, line_nums)
self.assertIn(3, line_nums)
self.assertIn(5, line_nums)
def test_python_skip_shebang_and_encoding(self):
"""Test that shebang and encoding declarations are skipped."""
code = '''#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This is a real comment
def func():
pass
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
comments = result['comments']
# Should only have the real comment
self.assertEqual(len(comments), 1)
self.assertEqual(comments[0]['text'], 'This is a real comment')
def test_javascript_inline_comments(self):
"""Test JavaScript // comment extraction."""
code = '''
// Top-level comment
function test() {
// Inside function
const x = 5; // Inline (not extracted)
return x;
}
// Another comment
const y = 10;
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
self.assertIn('comments', result)
comments = result['comments']
# Should have extracted standalone comments
self.assertGreaterEqual(len(comments), 3)
# Check comment types
inline_comments = [c for c in comments if c['type'] == 'inline']
self.assertGreaterEqual(len(inline_comments), 3)
def test_javascript_block_comments(self):
"""Test JavaScript /* */ block comment extraction."""
code = '''
/* This is a
multi-line
block comment */
function test() {
/* Another block comment */
return 42;
}
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
comments = result['comments']
# Should have extracted block comments
block_comments = [c for c in comments if c['type'] == 'block']
self.assertGreaterEqual(len(block_comments), 2)
# Check multi-line content is preserved
first_block = next(c for c in comments if 'multi-line' in c['text'])
self.assertIn('multi-line', first_block['text'])
def test_javascript_mixed_comments(self):
"""Test JavaScript mixed inline and block comments."""
code = '''
// Inline comment
/* Block comment */
function test() {
// Another inline
/* Another block */
return true;
}
'''
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
comments = result['comments']
# Should have both types
inline_comments = [c for c in comments if c['type'] == 'inline']
block_comments = [c for c in comments if c['type'] == 'block']
self.assertGreaterEqual(len(inline_comments), 2)
self.assertGreaterEqual(len(block_comments), 2)
def test_cpp_comment_extraction(self):
"""Test C++ comment extraction (uses same logic as JavaScript)."""
code = '''
// Header comment
class Node {
public:
// Method comment
void update();
/* Block comment for data member */
int value;
};
'''
result = self.analyzer.analyze_file('test.h', code, 'C++')
self.assertIn('comments', result)
comments = result['comments']
# Should have extracted comments
self.assertGreaterEqual(len(comments), 3)
# Check both inline and block
inline_comments = [c for c in comments if c['type'] == 'inline']
block_comments = [c for c in comments if c['type'] == 'block']
self.assertGreaterEqual(len(inline_comments), 2)
self.assertGreaterEqual(len(block_comments), 1)
def test_todo_fixme_comment_detection(self):
"""Test that TODO/FIXME comments are extracted."""
code = '''
# TODO: Implement this feature
def incomplete_func():
# FIXME: Handle edge case
pass
# NOTE: Important information
'''
result = self.analyzer.analyze_file('test.py', code, 'Python')
comments = result['comments']
comment_texts = [c['text'] for c in comments]
self.assertTrue(any('TODO' in text for text in comment_texts))
self.assertTrue(any('FIXME' in text for text in comment_texts))
self.assertTrue(any('NOTE' in text for text in comment_texts))
if __name__ == '__main__':
# Run tests with verbose output
unittest.main(verbosity=2)