Add unified scraping tests and example conflict data

- Move test_unified.py to tests/ directory (607 lines, 19 tests)
- Move conflicts.json to tests/fixtures/example_conflicts.json
- Tests cover config validation, conflict detection, merging, and skill building
- Example conflicts show docs/code mismatch scenarios for v2.0.0 feature

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
yusyus
2025-10-29 21:33:33 +03:00
parent e28aaa1a5e
commit 500576a707
2 changed files with 748 additions and 0 deletions

142
tests/fixtures/example_conflicts.json vendored Normal file
View File

@@ -0,0 +1,142 @@
{
"conflicts": [
{
"type": "missing_in_docs",
"severity": "medium",
"api_name": "Node2D",
"docs_info": null,
"code_info": {
"name": "Node2D",
"type": "class",
"source": "scene/node2d.py",
"line": 10,
"base_classes": [
"Node"
],
"docstring": "Base class for 2D nodes"
},
"difference": "API exists in code (scene/node2d.py) but not found in documentation",
"suggestion": "Add documentation for this API"
},
{
"type": "missing_in_docs",
"severity": "medium",
"api_name": "Node2D.move_local_x",
"docs_info": null,
"code_info": {
"name": "Node2D.move_local_x",
"type": "method",
"parameters": [
{
"name": "self",
"type_hint": null,
"default": null
},
{
"name": "delta",
"type_hint": "float",
"default": null
},
{
"name": "snap",
"type_hint": "bool",
"default": "False"
}
],
"return_type": "None",
"source": "scene/node2d.py",
"line": 45,
"docstring": "Move node along local X axis",
"is_async": false
},
"difference": "API exists in code (scene/node2d.py) but not found in documentation",
"suggestion": "Add documentation for this API"
},
{
"type": "missing_in_docs",
"severity": "medium",
"api_name": "Node2D.tween_position",
"docs_info": null,
"code_info": {
"name": "Node2D.tween_position",
"type": "method",
"parameters": [
{
"name": "self",
"type_hint": null,
"default": null
},
{
"name": "target",
"type_hint": "tuple",
"default": null
}
],
"return_type": "None",
"source": "scene/node2d.py",
"line": 52,
"docstring": "Animate to target position",
"is_async": true
},
"difference": "API exists in code (scene/node2d.py) but not found in documentation",
"suggestion": "Add documentation for this API"
},
{
"type": "missing_in_code",
"severity": "high",
"api_name": "move_local_x",
"docs_info": {
"name": "move_local_x",
"parameters": [
{
"name": "delta",
"type": "float",
"default": null
}
],
"return_type": "def",
"source": "https://example.com/api/node2d",
"raw_signature": "def move_local_x(delta: float)"
},
"code_info": null,
"difference": "API documented (https://example.com/api/node2d) but not found in code",
"suggestion": "Update documentation to remove this API, or add it to codebase"
},
{
"type": "missing_in_code",
"severity": "high",
"api_name": "rotate",
"docs_info": {
"name": "rotate",
"parameters": [
{
"name": "angle",
"type": "float",
"default": null
}
],
"return_type": "def",
"source": "https://example.com/api/node2d",
"raw_signature": "def rotate(angle: float)"
},
"code_info": null,
"difference": "API documented (https://example.com/api/node2d) but not found in code",
"suggestion": "Update documentation to remove this API, or add it to codebase"
}
],
"summary": {
"total": 5,
"by_type": {
"missing_in_docs": 3,
"missing_in_code": 2,
"signature_mismatch": 0,
"description_mismatch": 0
},
"by_severity": {
"low": 0,
"medium": 3,
"high": 2
},
"apis_affected": 5
}
}

606
tests/test_unified.py Normal file
View File

@@ -0,0 +1,606 @@
#!/usr/bin/env python3
"""
Tests for Unified Multi-Source Scraper
Covers:
- Config validation (unified vs legacy)
- Conflict detection
- Rule-based merging
- Skill building
"""
import os
import sys
import json
import pytest
import tempfile
from pathlib import Path
# Add CLI to path
sys.path.insert(0, str(Path(__file__).parent))
from config_validator import ConfigValidator, validate_config
from conflict_detector import ConflictDetector, Conflict
from merge_sources import RuleBasedMerger
from unified_skill_builder import UnifiedSkillBuilder
# ===========================
# Config Validation Tests
# ===========================
def test_detect_unified_format():
"""Test unified format detection"""
import tempfile
import json
unified_config = {
"name": "test",
"description": "Test skill",
"sources": [
{"type": "documentation", "base_url": "https://example.com"}
]
}
legacy_config = {
"name": "test",
"description": "Test skill",
"base_url": "https://example.com"
}
# Test unified detection
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(unified_config, f)
config_path = f.name
try:
validator = ConfigValidator(config_path)
assert validator.is_unified == True
finally:
os.unlink(config_path)
# Test legacy detection
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(legacy_config, f)
config_path = f.name
try:
validator = ConfigValidator(config_path)
assert validator.is_unified == False
finally:
os.unlink(config_path)
def test_validate_unified_sources():
"""Test source type validation"""
config = {
"name": "test",
"description": "Test",
"sources": [
{"type": "documentation", "base_url": "https://example.com"},
{"type": "github", "repo": "user/repo"},
{"type": "pdf", "path": "/path/to.pdf"}
]
}
validator = ConfigValidator(config)
validator.validate()
assert len(validator.config['sources']) == 3
def test_validate_invalid_source_type():
"""Test invalid source type raises error"""
config = {
"name": "test",
"description": "Test",
"sources": [
{"type": "invalid_type", "url": "https://example.com"}
]
}
validator = ConfigValidator(config)
with pytest.raises(ValueError, match="Invalid source type"):
validator.validate()
def test_needs_api_merge():
"""Test API merge detection"""
# Config with both docs and GitHub code
config_needs_merge = {
"name": "test",
"description": "Test",
"sources": [
{"type": "documentation", "base_url": "https://example.com", "extract_api": True},
{"type": "github", "repo": "user/repo", "include_code": True}
]
}
validator = ConfigValidator(config_needs_merge)
assert validator.needs_api_merge() == True
# Config with only docs
config_no_merge = {
"name": "test",
"description": "Test",
"sources": [
{"type": "documentation", "base_url": "https://example.com"}
]
}
validator = ConfigValidator(config_no_merge)
assert validator.needs_api_merge() == False
def test_backward_compatibility():
"""Test legacy config conversion"""
legacy_config = {
"name": "test",
"description": "Test skill",
"base_url": "https://example.com",
"selectors": {"main_content": "article"},
"max_pages": 100
}
validator = ConfigValidator(legacy_config)
unified = validator.convert_legacy_to_unified()
assert 'sources' in unified
assert len(unified['sources']) == 1
assert unified['sources'][0]['type'] == 'documentation'
assert unified['sources'][0]['base_url'] == 'https://example.com'
# ===========================
# Conflict Detection Tests
# ===========================
def test_detect_missing_in_docs():
"""Test detection of APIs missing in documentation"""
docs_data = {
'pages': [
{
'url': 'https://example.com/api',
'apis': [
{
'name': 'documented_func',
'parameters': [{'name': 'x', 'type': 'int'}],
'return_type': 'str'
}
]
}
]
}
github_data = {
'code_analysis': {
'analyzed_files': [
{
'functions': [
{
'name': 'undocumented_func',
'parameters': [{'name': 'y', 'type_hint': 'float'}],
'return_type': 'bool'
}
]
}
]
}
}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector._find_missing_in_docs()
assert len(conflicts) > 0
assert any(c.type == 'missing_in_docs' for c in conflicts)
assert any(c.api_name == 'undocumented_func' for c in conflicts)
def test_detect_missing_in_code():
"""Test detection of APIs missing in code"""
docs_data = {
'pages': [
{
'url': 'https://example.com/api',
'apis': [
{
'name': 'obsolete_func',
'parameters': [{'name': 'x', 'type': 'int'}],
'return_type': 'str'
}
]
}
]
}
github_data = {
'code_analysis': {
'analyzed_files': []
}
}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector._find_missing_in_code()
assert len(conflicts) > 0
assert any(c.type == 'missing_in_code' for c in conflicts)
assert any(c.api_name == 'obsolete_func' for c in conflicts)
def test_detect_signature_mismatch():
"""Test detection of signature mismatches"""
docs_data = {
'pages': [
{
'url': 'https://example.com/api',
'apis': [
{
'name': 'func',
'parameters': [{'name': 'x', 'type': 'int'}],
'return_type': 'str'
}
]
}
]
}
github_data = {
'code_analysis': {
'analyzed_files': [
{
'functions': [
{
'name': 'func',
'parameters': [
{'name': 'x', 'type_hint': 'int'},
{'name': 'y', 'type_hint': 'bool', 'default': 'False'}
],
'return_type': 'str'
}
]
}
]
}
}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector._find_signature_mismatches()
assert len(conflicts) > 0
assert any(c.type == 'signature_mismatch' for c in conflicts)
assert any(c.api_name == 'func' for c in conflicts)
def test_conflict_severity():
"""Test conflict severity assignment"""
# High severity: missing_in_code
conflict_high = Conflict(
type='missing_in_code',
severity='high',
api_name='test',
docs_info={'name': 'test'},
code_info=None,
difference='API documented but not in code'
)
assert conflict_high.severity == 'high'
# Medium severity: missing_in_docs
conflict_medium = Conflict(
type='missing_in_docs',
severity='medium',
api_name='test',
docs_info=None,
code_info={'name': 'test'},
difference='API in code but not documented'
)
assert conflict_medium.severity == 'medium'
# ===========================
# Merge Tests
# ===========================
def test_rule_based_merge_docs_only():
"""Test rule-based merge for docs-only APIs"""
docs_data = {
'pages': [
{
'url': 'https://example.com/api',
'apis': [
{
'name': 'docs_only_api',
'parameters': [{'name': 'x', 'type': 'int'}],
'return_type': 'str'
}
]
}
]
}
github_data = {'code_analysis': {'analyzed_files': []}}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector.detect_all_conflicts()
merger = RuleBasedMerger(docs_data, github_data, conflicts)
merged = merger.merge_all()
assert 'apis' in merged
assert 'docs_only_api' in merged['apis']
assert merged['apis']['docs_only_api']['status'] == 'docs_only'
def test_rule_based_merge_code_only():
"""Test rule-based merge for code-only APIs"""
docs_data = {'pages': []}
github_data = {
'code_analysis': {
'analyzed_files': [
{
'functions': [
{
'name': 'code_only_api',
'parameters': [{'name': 'y', 'type_hint': 'float'}],
'return_type': 'bool'
}
]
}
]
}
}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector.detect_all_conflicts()
merger = RuleBasedMerger(docs_data, github_data, conflicts)
merged = merger.merge_all()
assert 'apis' in merged
assert 'code_only_api' in merged['apis']
assert merged['apis']['code_only_api']['status'] == 'code_only'
def test_rule_based_merge_matched():
"""Test rule-based merge for matched APIs"""
docs_data = {
'pages': [
{
'url': 'https://example.com/api',
'apis': [
{
'name': 'matched_api',
'parameters': [{'name': 'x', 'type': 'int'}],
'return_type': 'str'
}
]
}
]
}
github_data = {
'code_analysis': {
'analyzed_files': [
{
'functions': [
{
'name': 'matched_api',
'parameters': [{'name': 'x', 'type_hint': 'int'}],
'return_type': 'str'
}
]
}
]
}
}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector.detect_all_conflicts()
merger = RuleBasedMerger(docs_data, github_data, conflicts)
merged = merger.merge_all()
assert 'apis' in merged
assert 'matched_api' in merged['apis']
assert merged['apis']['matched_api']['status'] == 'matched'
def test_merge_summary():
"""Test merge summary statistics"""
docs_data = {
'pages': [
{
'url': 'https://example.com/api',
'apis': [
{'name': 'api1', 'parameters': [], 'return_type': 'str'},
{'name': 'api2', 'parameters': [], 'return_type': 'int'}
]
}
]
}
github_data = {
'code_analysis': {
'analyzed_files': [
{
'functions': [
{'name': 'api3', 'parameters': [], 'return_type': 'bool'}
]
}
]
}
}
detector = ConflictDetector(docs_data, github_data)
conflicts = detector.detect_all_conflicts()
merger = RuleBasedMerger(docs_data, github_data, conflicts)
merged = merger.merge_all()
assert 'summary' in merged
assert merged['summary']['total_apis'] == 3
assert merged['summary']['docs_only'] == 2
assert merged['summary']['code_only'] == 1
# ===========================
# Skill Builder Tests
# ===========================
def test_skill_builder_basic():
"""Test basic skill building"""
config = {
'name': 'test_skill',
'description': 'Test skill description',
'sources': [
{'type': 'documentation', 'base_url': 'https://example.com'}
]
}
scraped_data = {
'documentation': {
'pages': [],
'data_file': '/tmp/test.json'
}
}
with tempfile.TemporaryDirectory() as tmpdir:
# Override output directory
builder = UnifiedSkillBuilder(config, scraped_data)
builder.skill_dir = tmpdir
builder._generate_skill_md()
# Check SKILL.md was created
skill_md = Path(tmpdir) / 'SKILL.md'
assert skill_md.exists()
content = skill_md.read_text()
assert 'test_skill' in content.lower()
assert 'Test skill description' in content
def test_skill_builder_with_conflicts():
"""Test skill building with conflicts"""
config = {
'name': 'test_skill',
'description': 'Test',
'sources': [
{'type': 'documentation', 'base_url': 'https://example.com'},
{'type': 'github', 'repo': 'user/repo'}
]
}
scraped_data = {}
conflicts = [
Conflict(
type='missing_in_code',
severity='high',
api_name='test_api',
docs_info={'name': 'test_api'},
code_info=None,
difference='Test difference'
)
]
with tempfile.TemporaryDirectory() as tmpdir:
builder = UnifiedSkillBuilder(config, scraped_data, conflicts=conflicts)
builder.skill_dir = tmpdir
builder._generate_skill_md()
skill_md = Path(tmpdir) / 'SKILL.md'
content = skill_md.read_text()
assert '1 conflicts detected' in content
assert 'missing_in_code' in content
def test_skill_builder_merged_apis():
"""Test skill building with merged APIs"""
config = {
'name': 'test',
'description': 'Test',
'sources': []
}
scraped_data = {}
merged_data = {
'apis': {
'test_api': {
'name': 'test_api',
'status': 'matched',
'merged_signature': 'test_api(x: int) -> str',
'merged_description': 'Test API',
'source': 'both'
}
}
}
with tempfile.TemporaryDirectory() as tmpdir:
builder = UnifiedSkillBuilder(config, scraped_data, merged_data=merged_data)
builder.skill_dir = tmpdir
content = builder._format_merged_apis()
assert '✅ Verified APIs' in content
assert 'test_api' in content
# ===========================
# Integration Tests
# ===========================
def test_full_workflow_unified_config():
"""Test complete workflow with unified config"""
# Create test config
config = {
"name": "test_unified",
"description": "Test unified workflow",
"merge_mode": "rule-based",
"sources": [
{
"type": "documentation",
"base_url": "https://example.com",
"extract_api": True
},
{
"type": "github",
"repo": "user/repo",
"include_code": True,
"code_analysis_depth": "surface"
}
]
}
# Validate config
validator = ConfigValidator(config)
validator.validate()
assert validator.is_unified == True
assert validator.needs_api_merge() == True
def test_config_file_validation():
"""Test validation from config file"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
config = {
"name": "test",
"description": "Test",
"sources": [
{"type": "documentation", "base_url": "https://example.com"}
]
}
json.dump(config, f)
config_path = f.name
try:
validator = validate_config(config_path)
assert validator.is_unified == True
finally:
os.unlink(config_path)
# Run tests
if __name__ == '__main__':
pytest.main([__file__, '-v'])