feat(C2.5): Add inline comment extraction for Python/JS/C++
- Added comment extraction methods to code_analyzer.py
- Supports Python (# style), JavaScript (// and /* */), C++ (// and /* */)
- Extracts comment text, line numbers, and type (inline vs block)
- Skips Python shebang and encoding declarations
- Preserves TODO/FIXME/NOTE markers for developer notes
Implementation:
- _extract_python_comments(): Extract # comments with line tracking
- _extract_js_comments(): Extract // and /* */ comments
- _extract_cpp_comments(): Reuses JS logic (same syntax)
- Integrated into _analyze_python(), _analyze_javascript(), _analyze_cpp()
Output Format:
{
'classes': [...],
'functions': [...],
'comments': [
{'line': 5, 'text': 'TODO: Optimize', 'type': 'inline'},
{'line': 12, 'text': 'Block comment\nwith lines', 'type': 'block'}
]
}
Tests:
- Added 8 comprehensive tests to test_code_analyzer.py
- Total: 30 tests passing ✅
- Python: Comment extraction, line numbers, shebang skip
- JavaScript: Inline comments, block comments, mixed
- C++: Comment extraction (uses JS logic)
- TODO/FIXME detection test
Related Issues:
- Closes #67 (C2.5 Extract inline comments as notes)
- Part of C2 Local Codebase Scraping roadmap (TIER 3)
Files Modified:
- src/skill_seekers/cli/code_analyzer.py (+67 lines)
- tests/test_code_analyzer.py (+194 lines)
This commit is contained in:
@@ -549,6 +549,198 @@ def main_func():
|
||||
self.assertIn('ClassB', class_names)
|
||||
|
||||
|
||||
class TestCommentExtraction(unittest.TestCase):
|
||||
"""Tests for comment extraction"""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test analyzer with deep analysis"""
|
||||
self.analyzer = CodeAnalyzer(depth='deep')
|
||||
|
||||
def test_python_comment_extraction(self):
|
||||
"""Test Python # comment extraction."""
|
||||
code = '''
|
||||
# This is a comment
|
||||
def test_func():
|
||||
# Inside function comment
|
||||
x = 5 # Inline comment (not extracted due to code on same line)
|
||||
return x
|
||||
|
||||
# Another top-level comment
|
||||
class TestClass:
|
||||
# Class-level comment
|
||||
pass
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.py', code, 'Python')
|
||||
|
||||
self.assertIn('comments', result)
|
||||
comments = result['comments']
|
||||
|
||||
# Should have extracted standalone comments
|
||||
self.assertGreaterEqual(len(comments), 3)
|
||||
|
||||
# Check comment content
|
||||
comment_texts = [c['text'] for c in comments]
|
||||
self.assertIn('This is a comment', comment_texts)
|
||||
self.assertIn('Inside function comment', comment_texts)
|
||||
self.assertIn('Another top-level comment', comment_texts)
|
||||
|
||||
# Check all are inline type
|
||||
for comment in comments:
|
||||
self.assertEqual(comment['type'], 'inline')
|
||||
|
||||
def test_python_comment_line_numbers(self):
|
||||
"""Test Python comment line number tracking."""
|
||||
code = '''# Line 1 comment
|
||||
def func():
|
||||
# Line 3 comment
|
||||
pass
|
||||
# Line 5 comment
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.py', code, 'Python')
|
||||
|
||||
comments = result['comments']
|
||||
self.assertEqual(len(comments), 3)
|
||||
|
||||
# Check line numbers
|
||||
line_nums = [c['line'] for c in comments]
|
||||
self.assertIn(1, line_nums)
|
||||
self.assertIn(3, line_nums)
|
||||
self.assertIn(5, line_nums)
|
||||
|
||||
def test_python_skip_shebang_and_encoding(self):
|
||||
"""Test that shebang and encoding declarations are skipped."""
|
||||
code = '''#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# This is a real comment
|
||||
def func():
|
||||
pass
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.py', code, 'Python')
|
||||
|
||||
comments = result['comments']
|
||||
|
||||
# Should only have the real comment
|
||||
self.assertEqual(len(comments), 1)
|
||||
self.assertEqual(comments[0]['text'], 'This is a real comment')
|
||||
|
||||
def test_javascript_inline_comments(self):
|
||||
"""Test JavaScript // comment extraction."""
|
||||
code = '''
|
||||
// Top-level comment
|
||||
function test() {
|
||||
// Inside function
|
||||
const x = 5; // Inline (not extracted)
|
||||
return x;
|
||||
}
|
||||
|
||||
// Another comment
|
||||
const y = 10;
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
|
||||
|
||||
self.assertIn('comments', result)
|
||||
comments = result['comments']
|
||||
|
||||
# Should have extracted standalone comments
|
||||
self.assertGreaterEqual(len(comments), 3)
|
||||
|
||||
# Check comment types
|
||||
inline_comments = [c for c in comments if c['type'] == 'inline']
|
||||
self.assertGreaterEqual(len(inline_comments), 3)
|
||||
|
||||
def test_javascript_block_comments(self):
|
||||
"""Test JavaScript /* */ block comment extraction."""
|
||||
code = '''
|
||||
/* This is a
|
||||
multi-line
|
||||
block comment */
|
||||
function test() {
|
||||
/* Another block comment */
|
||||
return 42;
|
||||
}
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
|
||||
|
||||
comments = result['comments']
|
||||
|
||||
# Should have extracted block comments
|
||||
block_comments = [c for c in comments if c['type'] == 'block']
|
||||
self.assertGreaterEqual(len(block_comments), 2)
|
||||
|
||||
# Check multi-line content is preserved
|
||||
first_block = next(c for c in comments if 'multi-line' in c['text'])
|
||||
self.assertIn('multi-line', first_block['text'])
|
||||
|
||||
def test_javascript_mixed_comments(self):
|
||||
"""Test JavaScript mixed inline and block comments."""
|
||||
code = '''
|
||||
// Inline comment
|
||||
/* Block comment */
|
||||
function test() {
|
||||
// Another inline
|
||||
/* Another block */
|
||||
return true;
|
||||
}
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.js', code, 'JavaScript')
|
||||
|
||||
comments = result['comments']
|
||||
|
||||
# Should have both types
|
||||
inline_comments = [c for c in comments if c['type'] == 'inline']
|
||||
block_comments = [c for c in comments if c['type'] == 'block']
|
||||
|
||||
self.assertGreaterEqual(len(inline_comments), 2)
|
||||
self.assertGreaterEqual(len(block_comments), 2)
|
||||
|
||||
def test_cpp_comment_extraction(self):
|
||||
"""Test C++ comment extraction (uses same logic as JavaScript)."""
|
||||
code = '''
|
||||
// Header comment
|
||||
class Node {
|
||||
public:
|
||||
// Method comment
|
||||
void update();
|
||||
|
||||
/* Block comment for data member */
|
||||
int value;
|
||||
};
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.h', code, 'C++')
|
||||
|
||||
self.assertIn('comments', result)
|
||||
comments = result['comments']
|
||||
|
||||
# Should have extracted comments
|
||||
self.assertGreaterEqual(len(comments), 3)
|
||||
|
||||
# Check both inline and block
|
||||
inline_comments = [c for c in comments if c['type'] == 'inline']
|
||||
block_comments = [c for c in comments if c['type'] == 'block']
|
||||
|
||||
self.assertGreaterEqual(len(inline_comments), 2)
|
||||
self.assertGreaterEqual(len(block_comments), 1)
|
||||
|
||||
def test_todo_fixme_comment_detection(self):
|
||||
"""Test that TODO/FIXME comments are extracted."""
|
||||
code = '''
|
||||
# TODO: Implement this feature
|
||||
def incomplete_func():
|
||||
# FIXME: Handle edge case
|
||||
pass
|
||||
|
||||
# NOTE: Important information
|
||||
'''
|
||||
result = self.analyzer.analyze_file('test.py', code, 'Python')
|
||||
|
||||
comments = result['comments']
|
||||
|
||||
comment_texts = [c['text'] for c in comments]
|
||||
self.assertTrue(any('TODO' in text for text in comment_texts))
|
||||
self.assertTrue(any('FIXME' in text for text in comment_texts))
|
||||
self.assertTrue(any('NOTE' in text for text in comment_texts))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Run tests with verbose output
|
||||
unittest.main(verbosity=2)
|
||||
|
||||
Reference in New Issue
Block a user