feat(C2.4): Add API reference generator from code signatures

- Created src/skill_seekers/cli/api_reference_builder.py (330 lines)
- Generates markdown API documentation from code analysis results
- Supports Python, JavaScript/TypeScript, and C++ code signatures

Features:
- Class documentation with inheritance and methods
- Function/method signatures with parameters and return types
- Parameter tables with types and defaults
- Async function indicators
- Decorators display (for Python)
- Standalone CLI tool for generating API docs from JSON

Tests:
- Created tests/test_api_reference_builder.py with 7 tests
- All tests passing 
- Test coverage: Class formatting, function formatting, parameter tables,
  markdown structure, code analyzer integration, async indicators

Output Format:
- One .md file per analyzed source file
- Organized: Classes → Methods, then standalone Functions
- Professional markdown tables for parameters

CLI Usage:
    python -m skill_seekers.cli.api_reference_builder \
        code_analysis.json output/api_reference/

Related Issues:
- Closes #66 (C2.4 Build API reference from code)
- Part of C2 Local Codebase Scraping roadmap (TIER 3)
This commit is contained in:
yusyus
2026-01-01 23:00:36 +03:00
parent f162727792
commit 43063dc0d2
2 changed files with 706 additions and 0 deletions

View File

@@ -0,0 +1,372 @@
#!/usr/bin/env python3
"""
API Reference Builder
Generates markdown API documentation from code analysis results.
Supports Python, JavaScript/TypeScript, and C++.
Output Format:
- One .md file per analyzed source file
- Organized by: Classes → Methods, then standalone Functions
- Includes: Signatures, parameters, return types, docstrings
Usage:
from skill_seekers.cli.api_reference_builder import APIReferenceBuilder
builder = APIReferenceBuilder(code_analysis_results)
builder.build_reference(output_dir)
"""
import os
import json
from pathlib import Path
from typing import Dict, List, Any, Optional
class APIReferenceBuilder:
"""
Builds markdown API reference from code analysis results.
Processes code analysis data and generates well-formatted markdown
documentation for each analyzed source file.
"""
def __init__(self, code_analysis: Dict[str, Any]):
"""
Initialize builder with code analysis results.
Args:
code_analysis: Dictionary containing analyzed files and their code structures.
Expected format: {'files': [{'file': 'path', 'classes': [...], 'functions': [...]}]}
"""
self.code_analysis = code_analysis
self.files_data = code_analysis.get('files', [])
def build_reference(self, output_dir: Path) -> Dict[str, Path]:
"""
Generate markdown files for each analyzed source file.
Args:
output_dir: Directory to save generated markdown files
Returns:
Dictionary mapping source file paths to generated markdown file paths
"""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
generated_files = {}
for file_data in self.files_data:
source_file = file_data.get('file', 'unknown')
language = file_data.get('language', 'Unknown')
# Skip files with no analysis
if not file_data.get('classes') and not file_data.get('functions'):
continue
# Generate markdown content
markdown_content = self._generate_file_reference(file_data, source_file, language)
# Determine output filename
output_filename = self._get_output_filename(source_file)
output_path = output_dir / output_filename
# Write markdown file
output_path.write_text(markdown_content, encoding='utf-8')
generated_files[source_file] = output_path
return generated_files
def _get_output_filename(self, source_file: str) -> str:
"""
Generate output filename from source file path.
Args:
source_file: Path to source file
Returns:
Safe filename for markdown output
"""
# Get base filename
basename = Path(source_file).name
# Replace extension with .md
name_without_ext = basename.rsplit('.', 1)[0] if '.' in basename else basename
return f"{name_without_ext}.md"
def _generate_file_reference(self, file_data: Dict[str, Any],
source_file: str, language: str) -> str:
"""
Generate complete markdown reference for a single file.
Args:
file_data: Analysis data for the file
source_file: Path to source file
language: Programming language
Returns:
Complete markdown content
"""
lines = []
# Header
filename = Path(source_file).name
lines.append(f"# API Reference: {filename}\n")
lines.append(f"**Language**: {language}\n")
lines.append(f"**Source**: `{source_file}`\n")
lines.append("---\n")
# Classes section
classes = file_data.get('classes', [])
if classes:
lines.append("## Classes\n")
for cls in classes:
lines.append(self._format_class(cls))
lines.append("\n")
# Functions section
functions = file_data.get('functions', [])
if functions:
lines.append("## Functions\n")
for func in functions:
lines.append(self._format_function(func))
lines.append("\n")
return '\n'.join(lines)
def _format_class(self, class_sig: Dict[str, Any]) -> str:
"""
Format class signature as markdown.
Args:
class_sig: Class signature dictionary
Returns:
Formatted markdown for class
"""
lines = []
# Class name
class_name = class_sig.get('name', 'Unknown')
lines.append(f"### {class_name}\n")
# Docstring
docstring = class_sig.get('docstring')
if docstring:
lines.append(f"{docstring}\n")
# Inheritance
base_classes = class_sig.get('base_classes', [])
if base_classes:
bases_str = ', '.join(base_classes)
lines.append(f"**Inherits from**: {bases_str}\n")
else:
lines.append("**Inherits from**: (none)\n")
# Methods
methods = class_sig.get('methods', [])
if methods:
lines.append("#### Methods\n")
for method in methods:
lines.append(self._format_method(method))
lines.append("")
return '\n'.join(lines)
def _format_method(self, method_sig: Dict[str, Any]) -> str:
"""
Format method signature as markdown.
Args:
method_sig: Method signature dictionary
Returns:
Formatted markdown for method
"""
lines = []
# Method signature
signature = self._build_signature(method_sig)
lines.append(f"##### {signature}\n")
# Docstring
docstring = method_sig.get('docstring')
if docstring:
lines.append(f"{docstring}\n")
# Decorators
decorators = method_sig.get('decorators', [])
if decorators:
dec_str = ', '.join(f"`@{d}`" for d in decorators)
lines.append(f"**Decorators**: {dec_str}\n")
# Parameters table
params = method_sig.get('parameters', [])
if params:
lines.append(self._format_parameters(params))
lines.append("")
# Return type
return_type = method_sig.get('return_type')
if return_type:
lines.append(f"**Returns**: `{return_type}`\n")
return '\n'.join(lines)
def _format_function(self, func_sig: Dict[str, Any]) -> str:
"""
Format function signature as markdown.
Args:
func_sig: Function signature dictionary
Returns:
Formatted markdown for function
"""
lines = []
# Function signature
signature = self._build_signature(func_sig)
lines.append(f"### {signature}\n")
# Async indicator
if func_sig.get('is_async'):
lines.append("**Async function**\n")
# Docstring
docstring = func_sig.get('docstring')
if docstring:
lines.append(f"{docstring}\n")
# Parameters table
params = func_sig.get('parameters', [])
if params:
lines.append(self._format_parameters(params))
lines.append("")
# Return type
return_type = func_sig.get('return_type')
if return_type:
lines.append(f"**Returns**: `{return_type}`\n")
else:
lines.append("**Returns**: (none)\n")
return '\n'.join(lines)
def _build_signature(self, sig: Dict[str, Any]) -> str:
"""
Build function/method signature string.
Args:
sig: Signature dictionary
Returns:
Formatted signature string
"""
name = sig.get('name', 'unknown')
params = sig.get('parameters', [])
return_type = sig.get('return_type')
# Build parameter list
param_strs = []
for param in params:
param_str = param.get('name', '')
# Add type hint if available
type_hint = param.get('type_hint')
if type_hint:
param_str += f": {type_hint}"
# Add default value if available
default = param.get('default')
if default:
param_str += f" = {default}"
param_strs.append(param_str)
params_str = ', '.join(param_strs)
# Build full signature
if return_type:
return f"{name}({params_str}) → {return_type}"
else:
return f"{name}({params_str})"
def _format_parameters(self, params: List[Dict]) -> str:
"""
Format parameter list as markdown table.
Args:
params: List of parameter dictionaries
Returns:
Formatted markdown table
"""
if not params:
return ""
lines = []
lines.append("**Parameters**:")
lines.append("")
lines.append("| Name | Type | Default | Description |")
lines.append("|------|------|---------|-------------|")
for param in params:
name = param.get('name', '-')
type_hint = param.get('type_hint', '-')
default = param.get('default')
# Show "-" for parameters without defaults
default_str = default if default is not None else '-'
# For description, use empty for now (would need JSDoc/docstring parsing)
description = "-"
lines.append(f"| {name} | {type_hint} | {default_str} | {description} |")
return '\n'.join(lines)
def main():
"""
Command-line interface for API reference generation.
Reads code analysis JSON and generates markdown API documentation.
"""
import argparse
parser = argparse.ArgumentParser(
description='Generate API reference from code analysis results'
)
parser.add_argument('input_file', help='Code analysis JSON file')
parser.add_argument('output_dir', help='Output directory for markdown files')
args = parser.parse_args()
# Read code analysis
input_path = Path(args.input_file)
if not input_path.exists():
print(f"Error: Input file not found: {input_path}")
return 1
with open(input_path, 'r', encoding='utf-8') as f:
code_analysis = json.load(f)
# Build API reference
builder = APIReferenceBuilder(code_analysis)
generated_files = builder.build_reference(Path(args.output_dir))
# Report results
print(f"✅ Generated {len(generated_files)} API reference files")
print(f"📁 Output directory: {args.output_dir}")
for source, output in generated_files.items():
print(f"{output.name} (from {Path(source).name})")
return 0
if __name__ == '__main__':
import sys
sys.exit(main())

View File

@@ -0,0 +1,334 @@
#!/usr/bin/env python3
"""
Tests for api_reference_builder.py - Markdown API documentation generation.
Test Coverage:
- Class formatting
- Function formatting
- Parameter table generation
- Markdown output structure
- Integration with code analysis results
"""
import unittest
import tempfile
import shutil
from pathlib import Path
import sys
import os
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from skill_seekers.cli.api_reference_builder import APIReferenceBuilder
class TestAPIReferenceBuilder(unittest.TestCase):
"""Tests for API reference builder"""
def setUp(self):
"""Set up test environment"""
self.temp_dir = tempfile.mkdtemp()
self.output_dir = Path(self.temp_dir) / "api_reference"
def tearDown(self):
"""Clean up test environment"""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_class_formatting(self):
"""Test markdown formatting for class signatures."""
code_analysis = {
'files': [{
'file': 'test.py',
'language': 'Python',
'classes': [{
'name': 'Calculator',
'docstring': 'A simple calculator class.',
'base_classes': ['object'],
'methods': [{
'name': 'add',
'parameters': [
{'name': 'a', 'type_hint': 'int', 'default': None},
{'name': 'b', 'type_hint': 'int', 'default': None}
],
'return_type': 'int',
'docstring': 'Add two numbers.',
'is_async': False,
'is_method': True,
'decorators': []
}]
}],
'functions': []
}]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Verify file was generated
self.assertEqual(len(generated), 1)
output_file = list(generated.values())[0]
self.assertTrue(output_file.exists())
# Verify content
content = output_file.read_text()
self.assertIn('### Calculator', content)
self.assertIn('A simple calculator class', content)
self.assertIn('**Inherits from**: object', content)
self.assertIn('##### add', content)
self.assertIn('Add two numbers', content)
def test_function_formatting(self):
"""Test markdown formatting for function signatures."""
code_analysis = {
'files': [{
'file': 'utils.py',
'language': 'Python',
'classes': [],
'functions': [{
'name': 'calculate_sum',
'parameters': [
{'name': 'numbers', 'type_hint': 'list', 'default': None}
],
'return_type': 'int',
'docstring': 'Calculate sum of numbers.',
'is_async': False,
'is_method': False,
'decorators': []
}]
}]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Verify content
output_file = list(generated.values())[0]
content = output_file.read_text()
self.assertIn('## Functions', content)
self.assertIn('### calculate_sum', content)
self.assertIn('Calculate sum of numbers', content)
self.assertIn('**Returns**: `int`', content)
def test_parameter_table_generation(self):
"""Test parameter table formatting."""
code_analysis = {
'files': [{
'file': 'test.py',
'language': 'Python',
'classes': [],
'functions': [{
'name': 'create_user',
'parameters': [
{'name': 'name', 'type_hint': 'str', 'default': None},
{'name': 'age', 'type_hint': 'int', 'default': '18'},
{'name': 'active', 'type_hint': 'bool', 'default': 'True'}
],
'return_type': 'dict',
'docstring': 'Create a user object.',
'is_async': False,
'is_method': False,
'decorators': []
}]
}]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Verify parameter table
output_file = list(generated.values())[0]
content = output_file.read_text()
self.assertIn('**Parameters**:', content)
self.assertIn('| Name | Type | Default | Description |', content)
self.assertIn('| name | str | - |', content) # Parameters with no default show "-"
self.assertIn('| age | int | 18 |', content)
self.assertIn('| active | bool | True |', content)
def test_markdown_output_structure(self):
"""Test overall markdown document structure."""
code_analysis = {
'files': [{
'file': 'module.py',
'language': 'Python',
'classes': [{
'name': 'TestClass',
'docstring': 'Test class.',
'base_classes': [],
'methods': []
}],
'functions': [{
'name': 'test_func',
'parameters': [],
'return_type': None,
'docstring': 'Test function.',
'is_async': False,
'is_method': False,
'decorators': []
}]
}]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Verify structure
output_file = list(generated.values())[0]
content = output_file.read_text()
# Check header
self.assertIn('# API Reference: module.py', content)
self.assertIn('**Language**: Python', content)
self.assertIn('**Source**: `module.py`', content)
# Check sections in order
classes_pos = content.find('## Classes')
functions_pos = content.find('## Functions')
self.assertNotEqual(classes_pos, -1)
self.assertNotEqual(functions_pos, -1)
self.assertLess(classes_pos, functions_pos)
def test_integration_with_code_analyzer(self):
"""Test integration with actual code analyzer output format."""
# Simulate real code analyzer output
code_analysis = {
'files': [
{
'file': 'calculator.py',
'language': 'Python',
'classes': [{
'name': 'Calculator',
'base_classes': [],
'methods': [
{
'name': 'add',
'parameters': [
{'name': 'a', 'type_hint': 'float', 'default': None},
{'name': 'b', 'type_hint': 'float', 'default': None}
],
'return_type': 'float',
'docstring': 'Add two numbers.',
'decorators': [],
'is_async': False,
'is_method': True
}
],
'docstring': 'Calculator class.',
'line_number': 1
}],
'functions': []
},
{
'file': 'utils.js',
'language': 'JavaScript',
'classes': [],
'functions': [{
'name': 'formatDate',
'parameters': [
{'name': 'date', 'type_hint': None, 'default': None}
],
'return_type': None,
'docstring': None,
'is_async': False,
'is_method': False,
'decorators': []
}]
}
]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Verify multiple files generated
self.assertEqual(len(generated), 2)
# Verify filenames
filenames = [f.name for f in generated.values()]
self.assertIn('calculator.md', filenames)
self.assertIn('utils.md', filenames)
# Verify Python file content
py_file = next(f for f in generated.values() if f.name == 'calculator.md')
py_content = py_file.read_text()
self.assertIn('Calculator class', py_content)
self.assertIn('add(a: float, b: float) → float', py_content)
# Verify JavaScript file content
js_file = next(f for f in generated.values() if f.name == 'utils.md')
js_content = js_file.read_text()
self.assertIn('formatDate', js_content)
self.assertIn('**Language**: JavaScript', js_content)
def test_async_function_indicator(self):
"""Test that async functions are marked in output."""
code_analysis = {
'files': [{
'file': 'async_utils.py',
'language': 'Python',
'classes': [],
'functions': [{
'name': 'fetch_data',
'parameters': [
{'name': 'url', 'type_hint': 'str', 'default': None}
],
'return_type': 'dict',
'docstring': 'Fetch data from URL.',
'is_async': True,
'is_method': False,
'decorators': []
}]
}]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Verify async indicator
output_file = list(generated.values())[0]
content = output_file.read_text()
self.assertIn('**Async function**', content)
self.assertIn('fetch_data', content)
def test_empty_analysis_skipped(self):
"""Test that files with no analysis are skipped."""
code_analysis = {
'files': [
{
'file': 'empty.py',
'language': 'Python',
'classes': [],
'functions': []
},
{
'file': 'valid.py',
'language': 'Python',
'classes': [],
'functions': [{
'name': 'test',
'parameters': [],
'return_type': None,
'docstring': None,
'is_async': False,
'is_method': False,
'decorators': []
}]
}
]
}
builder = APIReferenceBuilder(code_analysis)
generated = builder.build_reference(self.output_dir)
# Only valid.py should be generated
self.assertEqual(len(generated), 1)
self.assertIn('valid.py', list(generated.keys())[0])
if __name__ == '__main__':
# Run tests with verbose output
unittest.main(verbosity=2)