feat(C2.5): Add inline comment extraction for Python/JS/C++
- Added comment extraction methods to code_analyzer.py
- Supports Python (# style), JavaScript (// and /* */), C++ (// and /* */)
- Extracts comment text, line numbers, and type (inline vs block)
- Skips Python shebang and encoding declarations
- Preserves TODO/FIXME/NOTE markers for developer notes
Implementation:
- _extract_python_comments(): Extract # comments with line tracking
- _extract_js_comments(): Extract // and /* */ comments
- _extract_cpp_comments(): Reuses JS logic (same syntax)
- Integrated into _analyze_python(), _analyze_javascript(), _analyze_cpp()
Output Format:
{
'classes': [...],
'functions': [...],
'comments': [
{'line': 5, 'text': 'TODO: Optimize', 'type': 'inline'},
{'line': 12, 'text': 'Block comment\nwith lines', 'type': 'block'}
]
}
Tests:
- Added 8 comprehensive tests to test_code_analyzer.py
- Total: 30 tests passing ✅
- Python: Comment extraction, line numbers, shebang skip
- JavaScript: Inline comments, block comments, mixed
- C++: Comment extraction (uses JS logic)
- TODO/FIXME detection test
Related Issues:
- Closes #67 (C2.5 Extract inline comments as notes)
- Part of C2 Local Codebase Scraping roadmap (TIER 3)
Files Modified:
- src/skill_seekers/cli/code_analyzer.py (+67 lines)
- tests/test_code_analyzer.py (+194 lines)
This commit is contained in:
@@ -131,9 +131,13 @@ class CodeAnalyzer:
|
||||
func_sig = self._extract_python_function(node)
|
||||
functions.append(asdict(func_sig))
|
||||
|
||||
# Extract comments
|
||||
comments = self._extract_python_comments(content)
|
||||
|
||||
return {
|
||||
'classes': classes,
|
||||
'functions': functions
|
||||
'functions': functions,
|
||||
'comments': comments
|
||||
}
|
||||
|
||||
def _extract_python_class(self, node: ast.ClassDef) -> ClassSignature:
|
||||
@@ -298,9 +302,13 @@ class CodeAnalyzer:
|
||||
'decorators': []
|
||||
})
|
||||
|
||||
# Extract comments
|
||||
comments = self._extract_js_comments(content)
|
||||
|
||||
return {
|
||||
'classes': classes,
|
||||
'functions': functions
|
||||
'functions': functions,
|
||||
'comments': comments
|
||||
}
|
||||
|
||||
def _extract_js_methods(self, class_body: str) -> List[Dict]:
|
||||
@@ -419,9 +427,13 @@ class CodeAnalyzer:
|
||||
'decorators': []
|
||||
})
|
||||
|
||||
# Extract comments
|
||||
comments = self._extract_cpp_comments(content)
|
||||
|
||||
return {
|
||||
'classes': classes,
|
||||
'functions': functions
|
||||
'functions': functions,
|
||||
'comments': comments
|
||||
}
|
||||
|
||||
def _parse_cpp_parameters(self, params_str: str) -> List[Dict]:
|
||||
@@ -463,6 +475,73 @@ class CodeAnalyzer:
|
||||
|
||||
return params
|
||||
|
||||
def _extract_python_comments(self, content: str) -> List[Dict]:
|
||||
"""
|
||||
Extract Python comments (# style).
|
||||
|
||||
Returns list of comment dictionaries with line number, text, and type.
|
||||
"""
|
||||
comments = []
|
||||
|
||||
for i, line in enumerate(content.splitlines(), 1):
|
||||
stripped = line.strip()
|
||||
|
||||
# Skip shebang and encoding declarations
|
||||
if stripped.startswith('#!') or stripped.startswith('#') and 'coding' in stripped:
|
||||
continue
|
||||
|
||||
# Extract regular comments
|
||||
if stripped.startswith('#'):
|
||||
comment_text = stripped[1:].strip()
|
||||
comments.append({
|
||||
'line': i,
|
||||
'text': comment_text,
|
||||
'type': 'inline'
|
||||
})
|
||||
|
||||
return comments
|
||||
|
||||
def _extract_js_comments(self, content: str) -> List[Dict]:
|
||||
"""
|
||||
Extract JavaScript/TypeScript comments (// and /* */ styles).
|
||||
|
||||
Returns list of comment dictionaries with line number, text, and type.
|
||||
"""
|
||||
comments = []
|
||||
|
||||
# Extract single-line comments (//)
|
||||
for match in re.finditer(r'//(.+)$', content, re.MULTILINE):
|
||||
line_num = content[:match.start()].count('\n') + 1
|
||||
comment_text = match.group(1).strip()
|
||||
|
||||
comments.append({
|
||||
'line': line_num,
|
||||
'text': comment_text,
|
||||
'type': 'inline'
|
||||
})
|
||||
|
||||
# Extract multi-line comments (/* */)
|
||||
for match in re.finditer(r'/\*(.+?)\*/', content, re.DOTALL):
|
||||
start_line = content[:match.start()].count('\n') + 1
|
||||
comment_text = match.group(1).strip()
|
||||
|
||||
comments.append({
|
||||
'line': start_line,
|
||||
'text': comment_text,
|
||||
'type': 'block'
|
||||
})
|
||||
|
||||
return comments
|
||||
|
||||
def _extract_cpp_comments(self, content: str) -> List[Dict]:
|
||||
"""
|
||||
Extract C++ comments (// and /* */ styles, same as JavaScript).
|
||||
|
||||
Returns list of comment dictionaries with line number, text, and type.
|
||||
"""
|
||||
# C++ uses the same comment syntax as JavaScript
|
||||
return self._extract_js_comments(content)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test the analyzer
|
||||
|
||||
Reference in New Issue
Block a user