From 43063dc0d2bcafee5b26aacb34996c6a26d8d5ba Mon Sep 17 00:00:00 2001 From: yusyus Date: Thu, 1 Jan 2026 23:00:36 +0300 Subject: [PATCH] feat(C2.4): Add API reference generator from code signatures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created src/skill_seekers/cli/api_reference_builder.py (330 lines) - Generates markdown API documentation from code analysis results - Supports Python, JavaScript/TypeScript, and C++ code signatures Features: - Class documentation with inheritance and methods - Function/method signatures with parameters and return types - Parameter tables with types and defaults - Async function indicators - Decorators display (for Python) - Standalone CLI tool for generating API docs from JSON Tests: - Created tests/test_api_reference_builder.py with 7 tests - All tests passing ✅ - Test coverage: Class formatting, function formatting, parameter tables, markdown structure, code analyzer integration, async indicators Output Format: - One .md file per analyzed source file - Organized: Classes → Methods, then standalone Functions - Professional markdown tables for parameters CLI Usage: python -m skill_seekers.cli.api_reference_builder \ code_analysis.json output/api_reference/ Related Issues: - Closes #66 (C2.4 Build API reference from code) - Part of C2 Local Codebase Scraping roadmap (TIER 3) --- .../cli/api_reference_builder.py | 372 ++++++++++++++++++ tests/test_api_reference_builder.py | 334 ++++++++++++++++ 2 files changed, 706 insertions(+) create mode 100644 src/skill_seekers/cli/api_reference_builder.py create mode 100644 tests/test_api_reference_builder.py diff --git a/src/skill_seekers/cli/api_reference_builder.py b/src/skill_seekers/cli/api_reference_builder.py new file mode 100644 index 0000000..1264a31 --- /dev/null +++ b/src/skill_seekers/cli/api_reference_builder.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +""" +API Reference Builder + +Generates markdown API documentation from code analysis results. +Supports Python, JavaScript/TypeScript, and C++. + +Output Format: +- One .md file per analyzed source file +- Organized by: Classes → Methods, then standalone Functions +- Includes: Signatures, parameters, return types, docstrings + +Usage: + from skill_seekers.cli.api_reference_builder import APIReferenceBuilder + + builder = APIReferenceBuilder(code_analysis_results) + builder.build_reference(output_dir) +""" + +import os +import json +from pathlib import Path +from typing import Dict, List, Any, Optional + + +class APIReferenceBuilder: + """ + Builds markdown API reference from code analysis results. + + Processes code analysis data and generates well-formatted markdown + documentation for each analyzed source file. + """ + + def __init__(self, code_analysis: Dict[str, Any]): + """ + Initialize builder with code analysis results. + + Args: + code_analysis: Dictionary containing analyzed files and their code structures. + Expected format: {'files': [{'file': 'path', 'classes': [...], 'functions': [...]}]} + """ + self.code_analysis = code_analysis + self.files_data = code_analysis.get('files', []) + + def build_reference(self, output_dir: Path) -> Dict[str, Path]: + """ + Generate markdown files for each analyzed source file. + + Args: + output_dir: Directory to save generated markdown files + + Returns: + Dictionary mapping source file paths to generated markdown file paths + """ + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + generated_files = {} + + for file_data in self.files_data: + source_file = file_data.get('file', 'unknown') + language = file_data.get('language', 'Unknown') + + # Skip files with no analysis + if not file_data.get('classes') and not file_data.get('functions'): + continue + + # Generate markdown content + markdown_content = self._generate_file_reference(file_data, source_file, language) + + # Determine output filename + output_filename = self._get_output_filename(source_file) + output_path = output_dir / output_filename + + # Write markdown file + output_path.write_text(markdown_content, encoding='utf-8') + generated_files[source_file] = output_path + + return generated_files + + def _get_output_filename(self, source_file: str) -> str: + """ + Generate output filename from source file path. + + Args: + source_file: Path to source file + + Returns: + Safe filename for markdown output + """ + # Get base filename + basename = Path(source_file).name + + # Replace extension with .md + name_without_ext = basename.rsplit('.', 1)[0] if '.' in basename else basename + return f"{name_without_ext}.md" + + def _generate_file_reference(self, file_data: Dict[str, Any], + source_file: str, language: str) -> str: + """ + Generate complete markdown reference for a single file. + + Args: + file_data: Analysis data for the file + source_file: Path to source file + language: Programming language + + Returns: + Complete markdown content + """ + lines = [] + + # Header + filename = Path(source_file).name + lines.append(f"# API Reference: {filename}\n") + lines.append(f"**Language**: {language}\n") + lines.append(f"**Source**: `{source_file}`\n") + lines.append("---\n") + + # Classes section + classes = file_data.get('classes', []) + if classes: + lines.append("## Classes\n") + for cls in classes: + lines.append(self._format_class(cls)) + lines.append("\n") + + # Functions section + functions = file_data.get('functions', []) + if functions: + lines.append("## Functions\n") + for func in functions: + lines.append(self._format_function(func)) + lines.append("\n") + + return '\n'.join(lines) + + def _format_class(self, class_sig: Dict[str, Any]) -> str: + """ + Format class signature as markdown. + + Args: + class_sig: Class signature dictionary + + Returns: + Formatted markdown for class + """ + lines = [] + + # Class name + class_name = class_sig.get('name', 'Unknown') + lines.append(f"### {class_name}\n") + + # Docstring + docstring = class_sig.get('docstring') + if docstring: + lines.append(f"{docstring}\n") + + # Inheritance + base_classes = class_sig.get('base_classes', []) + if base_classes: + bases_str = ', '.join(base_classes) + lines.append(f"**Inherits from**: {bases_str}\n") + else: + lines.append("**Inherits from**: (none)\n") + + # Methods + methods = class_sig.get('methods', []) + if methods: + lines.append("#### Methods\n") + for method in methods: + lines.append(self._format_method(method)) + lines.append("") + + return '\n'.join(lines) + + def _format_method(self, method_sig: Dict[str, Any]) -> str: + """ + Format method signature as markdown. + + Args: + method_sig: Method signature dictionary + + Returns: + Formatted markdown for method + """ + lines = [] + + # Method signature + signature = self._build_signature(method_sig) + lines.append(f"##### {signature}\n") + + # Docstring + docstring = method_sig.get('docstring') + if docstring: + lines.append(f"{docstring}\n") + + # Decorators + decorators = method_sig.get('decorators', []) + if decorators: + dec_str = ', '.join(f"`@{d}`" for d in decorators) + lines.append(f"**Decorators**: {dec_str}\n") + + # Parameters table + params = method_sig.get('parameters', []) + if params: + lines.append(self._format_parameters(params)) + lines.append("") + + # Return type + return_type = method_sig.get('return_type') + if return_type: + lines.append(f"**Returns**: `{return_type}`\n") + + return '\n'.join(lines) + + def _format_function(self, func_sig: Dict[str, Any]) -> str: + """ + Format function signature as markdown. + + Args: + func_sig: Function signature dictionary + + Returns: + Formatted markdown for function + """ + lines = [] + + # Function signature + signature = self._build_signature(func_sig) + lines.append(f"### {signature}\n") + + # Async indicator + if func_sig.get('is_async'): + lines.append("**Async function**\n") + + # Docstring + docstring = func_sig.get('docstring') + if docstring: + lines.append(f"{docstring}\n") + + # Parameters table + params = func_sig.get('parameters', []) + if params: + lines.append(self._format_parameters(params)) + lines.append("") + + # Return type + return_type = func_sig.get('return_type') + if return_type: + lines.append(f"**Returns**: `{return_type}`\n") + else: + lines.append("**Returns**: (none)\n") + + return '\n'.join(lines) + + def _build_signature(self, sig: Dict[str, Any]) -> str: + """ + Build function/method signature string. + + Args: + sig: Signature dictionary + + Returns: + Formatted signature string + """ + name = sig.get('name', 'unknown') + params = sig.get('parameters', []) + return_type = sig.get('return_type') + + # Build parameter list + param_strs = [] + for param in params: + param_str = param.get('name', '') + + # Add type hint if available + type_hint = param.get('type_hint') + if type_hint: + param_str += f": {type_hint}" + + # Add default value if available + default = param.get('default') + if default: + param_str += f" = {default}" + + param_strs.append(param_str) + + params_str = ', '.join(param_strs) + + # Build full signature + if return_type: + return f"{name}({params_str}) → {return_type}" + else: + return f"{name}({params_str})" + + def _format_parameters(self, params: List[Dict]) -> str: + """ + Format parameter list as markdown table. + + Args: + params: List of parameter dictionaries + + Returns: + Formatted markdown table + """ + if not params: + return "" + + lines = [] + lines.append("**Parameters**:") + lines.append("") + lines.append("| Name | Type | Default | Description |") + lines.append("|------|------|---------|-------------|") + + for param in params: + name = param.get('name', '-') + type_hint = param.get('type_hint', '-') + default = param.get('default') + + # Show "-" for parameters without defaults + default_str = default if default is not None else '-' + + # For description, use empty for now (would need JSDoc/docstring parsing) + description = "-" + + lines.append(f"| {name} | {type_hint} | {default_str} | {description} |") + + return '\n'.join(lines) + + +def main(): + """ + Command-line interface for API reference generation. + + Reads code analysis JSON and generates markdown API documentation. + """ + import argparse + + parser = argparse.ArgumentParser( + description='Generate API reference from code analysis results' + ) + + parser.add_argument('input_file', help='Code analysis JSON file') + parser.add_argument('output_dir', help='Output directory for markdown files') + + args = parser.parse_args() + + # Read code analysis + input_path = Path(args.input_file) + if not input_path.exists(): + print(f"Error: Input file not found: {input_path}") + return 1 + + with open(input_path, 'r', encoding='utf-8') as f: + code_analysis = json.load(f) + + # Build API reference + builder = APIReferenceBuilder(code_analysis) + generated_files = builder.build_reference(Path(args.output_dir)) + + # Report results + print(f"✅ Generated {len(generated_files)} API reference files") + print(f"📁 Output directory: {args.output_dir}") + for source, output in generated_files.items(): + print(f" • {output.name} (from {Path(source).name})") + + return 0 + + +if __name__ == '__main__': + import sys + sys.exit(main()) diff --git a/tests/test_api_reference_builder.py b/tests/test_api_reference_builder.py new file mode 100644 index 0000000..85915a1 --- /dev/null +++ b/tests/test_api_reference_builder.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python3 +""" +Tests for api_reference_builder.py - Markdown API documentation generation. + +Test Coverage: +- Class formatting +- Function formatting +- Parameter table generation +- Markdown output structure +- Integration with code analysis results +""" + +import unittest +import tempfile +import shutil +from pathlib import Path +import sys +import os + +# Add src to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) + +from skill_seekers.cli.api_reference_builder import APIReferenceBuilder + + +class TestAPIReferenceBuilder(unittest.TestCase): + """Tests for API reference builder""" + + def setUp(self): + """Set up test environment""" + self.temp_dir = tempfile.mkdtemp() + self.output_dir = Path(self.temp_dir) / "api_reference" + + def tearDown(self): + """Clean up test environment""" + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def test_class_formatting(self): + """Test markdown formatting for class signatures.""" + code_analysis = { + 'files': [{ + 'file': 'test.py', + 'language': 'Python', + 'classes': [{ + 'name': 'Calculator', + 'docstring': 'A simple calculator class.', + 'base_classes': ['object'], + 'methods': [{ + 'name': 'add', + 'parameters': [ + {'name': 'a', 'type_hint': 'int', 'default': None}, + {'name': 'b', 'type_hint': 'int', 'default': None} + ], + 'return_type': 'int', + 'docstring': 'Add two numbers.', + 'is_async': False, + 'is_method': True, + 'decorators': [] + }] + }], + 'functions': [] + }] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Verify file was generated + self.assertEqual(len(generated), 1) + output_file = list(generated.values())[0] + self.assertTrue(output_file.exists()) + + # Verify content + content = output_file.read_text() + self.assertIn('### Calculator', content) + self.assertIn('A simple calculator class', content) + self.assertIn('**Inherits from**: object', content) + self.assertIn('##### add', content) + self.assertIn('Add two numbers', content) + + def test_function_formatting(self): + """Test markdown formatting for function signatures.""" + code_analysis = { + 'files': [{ + 'file': 'utils.py', + 'language': 'Python', + 'classes': [], + 'functions': [{ + 'name': 'calculate_sum', + 'parameters': [ + {'name': 'numbers', 'type_hint': 'list', 'default': None} + ], + 'return_type': 'int', + 'docstring': 'Calculate sum of numbers.', + 'is_async': False, + 'is_method': False, + 'decorators': [] + }] + }] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Verify content + output_file = list(generated.values())[0] + content = output_file.read_text() + + self.assertIn('## Functions', content) + self.assertIn('### calculate_sum', content) + self.assertIn('Calculate sum of numbers', content) + self.assertIn('**Returns**: `int`', content) + + def test_parameter_table_generation(self): + """Test parameter table formatting.""" + code_analysis = { + 'files': [{ + 'file': 'test.py', + 'language': 'Python', + 'classes': [], + 'functions': [{ + 'name': 'create_user', + 'parameters': [ + {'name': 'name', 'type_hint': 'str', 'default': None}, + {'name': 'age', 'type_hint': 'int', 'default': '18'}, + {'name': 'active', 'type_hint': 'bool', 'default': 'True'} + ], + 'return_type': 'dict', + 'docstring': 'Create a user object.', + 'is_async': False, + 'is_method': False, + 'decorators': [] + }] + }] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Verify parameter table + output_file = list(generated.values())[0] + content = output_file.read_text() + + self.assertIn('**Parameters**:', content) + self.assertIn('| Name | Type | Default | Description |', content) + self.assertIn('| name | str | - |', content) # Parameters with no default show "-" + self.assertIn('| age | int | 18 |', content) + self.assertIn('| active | bool | True |', content) + + def test_markdown_output_structure(self): + """Test overall markdown document structure.""" + code_analysis = { + 'files': [{ + 'file': 'module.py', + 'language': 'Python', + 'classes': [{ + 'name': 'TestClass', + 'docstring': 'Test class.', + 'base_classes': [], + 'methods': [] + }], + 'functions': [{ + 'name': 'test_func', + 'parameters': [], + 'return_type': None, + 'docstring': 'Test function.', + 'is_async': False, + 'is_method': False, + 'decorators': [] + }] + }] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Verify structure + output_file = list(generated.values())[0] + content = output_file.read_text() + + # Check header + self.assertIn('# API Reference: module.py', content) + self.assertIn('**Language**: Python', content) + self.assertIn('**Source**: `module.py`', content) + + # Check sections in order + classes_pos = content.find('## Classes') + functions_pos = content.find('## Functions') + + self.assertNotEqual(classes_pos, -1) + self.assertNotEqual(functions_pos, -1) + self.assertLess(classes_pos, functions_pos) + + def test_integration_with_code_analyzer(self): + """Test integration with actual code analyzer output format.""" + # Simulate real code analyzer output + code_analysis = { + 'files': [ + { + 'file': 'calculator.py', + 'language': 'Python', + 'classes': [{ + 'name': 'Calculator', + 'base_classes': [], + 'methods': [ + { + 'name': 'add', + 'parameters': [ + {'name': 'a', 'type_hint': 'float', 'default': None}, + {'name': 'b', 'type_hint': 'float', 'default': None} + ], + 'return_type': 'float', + 'docstring': 'Add two numbers.', + 'decorators': [], + 'is_async': False, + 'is_method': True + } + ], + 'docstring': 'Calculator class.', + 'line_number': 1 + }], + 'functions': [] + }, + { + 'file': 'utils.js', + 'language': 'JavaScript', + 'classes': [], + 'functions': [{ + 'name': 'formatDate', + 'parameters': [ + {'name': 'date', 'type_hint': None, 'default': None} + ], + 'return_type': None, + 'docstring': None, + 'is_async': False, + 'is_method': False, + 'decorators': [] + }] + } + ] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Verify multiple files generated + self.assertEqual(len(generated), 2) + + # Verify filenames + filenames = [f.name for f in generated.values()] + self.assertIn('calculator.md', filenames) + self.assertIn('utils.md', filenames) + + # Verify Python file content + py_file = next(f for f in generated.values() if f.name == 'calculator.md') + py_content = py_file.read_text() + self.assertIn('Calculator class', py_content) + self.assertIn('add(a: float, b: float) → float', py_content) + + # Verify JavaScript file content + js_file = next(f for f in generated.values() if f.name == 'utils.md') + js_content = js_file.read_text() + self.assertIn('formatDate', js_content) + self.assertIn('**Language**: JavaScript', js_content) + + def test_async_function_indicator(self): + """Test that async functions are marked in output.""" + code_analysis = { + 'files': [{ + 'file': 'async_utils.py', + 'language': 'Python', + 'classes': [], + 'functions': [{ + 'name': 'fetch_data', + 'parameters': [ + {'name': 'url', 'type_hint': 'str', 'default': None} + ], + 'return_type': 'dict', + 'docstring': 'Fetch data from URL.', + 'is_async': True, + 'is_method': False, + 'decorators': [] + }] + }] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Verify async indicator + output_file = list(generated.values())[0] + content = output_file.read_text() + + self.assertIn('**Async function**', content) + self.assertIn('fetch_data', content) + + def test_empty_analysis_skipped(self): + """Test that files with no analysis are skipped.""" + code_analysis = { + 'files': [ + { + 'file': 'empty.py', + 'language': 'Python', + 'classes': [], + 'functions': [] + }, + { + 'file': 'valid.py', + 'language': 'Python', + 'classes': [], + 'functions': [{ + 'name': 'test', + 'parameters': [], + 'return_type': None, + 'docstring': None, + 'is_async': False, + 'is_method': False, + 'decorators': [] + }] + } + ] + } + + builder = APIReferenceBuilder(code_analysis) + generated = builder.build_reference(self.output_dir) + + # Only valid.py should be generated + self.assertEqual(len(generated), 1) + self.assertIn('valid.py', list(generated.keys())[0]) + + +if __name__ == '__main__': + # Run tests with verbose output + unittest.main(verbosity=2)