Fixed all remaining linting errors from the 310 total: - SIM102: Combined nested if statements (31 errors) - adaptors/openai.py - config_extractor.py - codebase_scraper.py - doc_scraper.py - github_fetcher.py - pattern_recognizer.py - pdf_scraper.py - test_example_extractor.py - SIM117: Combined multiple with statements (24 errors) - tests/test_async_scraping.py (2 errors) - tests/test_github_scraper.py (2 errors) - tests/test_guide_enhancer.py (20 errors) - Fixed test fixture parameter (mock_config in test_c3_integration.py) All 700+ tests passing. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
180 lines
5.6 KiB
Python
180 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple Integration Tests for Unified Multi-Source Scraper
|
|
|
|
Focuses on real-world usage patterns rather than unit tests.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
# Add CLI to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from .config_validator import validate_config
|
|
|
|
|
|
def test_validate_existing_unified_configs():
|
|
"""Test that all existing unified configs are valid"""
|
|
configs_dir = Path(__file__).parent.parent / "configs"
|
|
|
|
unified_configs = [
|
|
"godot_unified.json",
|
|
"react_unified.json",
|
|
"django_unified.json",
|
|
"fastapi_unified.json",
|
|
]
|
|
|
|
for config_name in unified_configs:
|
|
config_path = configs_dir / config_name
|
|
if config_path.exists():
|
|
print(f"\n✓ Validating {config_name}...")
|
|
validator = validate_config(str(config_path))
|
|
assert validator.is_unified, f"{config_name} should be unified format"
|
|
assert validator.needs_api_merge(), f"{config_name} should need API merging"
|
|
print(f" Sources: {len(validator.config['sources'])}")
|
|
print(f" Merge mode: {validator.config.get('merge_mode')}")
|
|
|
|
|
|
def test_backward_compatibility():
|
|
"""Test that legacy configs still work"""
|
|
configs_dir = Path(__file__).parent.parent / "configs"
|
|
|
|
legacy_configs = ["react.json", "godot.json", "django.json"]
|
|
|
|
for config_name in legacy_configs:
|
|
config_path = configs_dir / config_name
|
|
if config_path.exists():
|
|
print(f"\n✓ Validating legacy {config_name}...")
|
|
validator = validate_config(str(config_path))
|
|
assert not validator.is_unified, f"{config_name} should be legacy format"
|
|
print(" Format: Legacy")
|
|
|
|
|
|
def test_create_temp_unified_config():
|
|
"""Test creating a unified config from scratch"""
|
|
config = {
|
|
"name": "test_unified",
|
|
"description": "Test unified config",
|
|
"merge_mode": "rule-based",
|
|
"sources": [
|
|
{
|
|
"type": "documentation",
|
|
"base_url": "https://example.com/docs",
|
|
"extract_api": True,
|
|
"max_pages": 50,
|
|
},
|
|
{
|
|
"type": "github",
|
|
"repo": "test/repo",
|
|
"include_code": True,
|
|
"code_analysis_depth": "surface",
|
|
},
|
|
],
|
|
}
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
json.dump(config, f)
|
|
config_path = f.name
|
|
|
|
try:
|
|
print("\n✓ Validating temp unified config...")
|
|
validator = validate_config(config_path)
|
|
assert validator.is_unified
|
|
assert validator.needs_api_merge()
|
|
assert len(validator.config["sources"]) == 2
|
|
print(" ✓ Config is valid unified format")
|
|
print(f" Sources: {len(validator.config['sources'])}")
|
|
finally:
|
|
os.unlink(config_path)
|
|
|
|
|
|
def test_mixed_source_types():
|
|
"""Test config with documentation, GitHub, and PDF sources"""
|
|
config = {
|
|
"name": "test_mixed",
|
|
"description": "Test mixed sources",
|
|
"merge_mode": "rule-based",
|
|
"sources": [
|
|
{"type": "documentation", "base_url": "https://example.com"},
|
|
{"type": "github", "repo": "test/repo"},
|
|
{"type": "pdf", "path": "/path/to/manual.pdf"},
|
|
],
|
|
}
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
json.dump(config, f)
|
|
config_path = f.name
|
|
|
|
try:
|
|
print("\n✓ Validating mixed source types...")
|
|
validator = validate_config(config_path)
|
|
assert validator.is_unified
|
|
assert len(validator.config["sources"]) == 3
|
|
|
|
# Check each source type
|
|
source_types = [s["type"] for s in validator.config["sources"]]
|
|
assert "documentation" in source_types
|
|
assert "github" in source_types
|
|
assert "pdf" in source_types
|
|
print(" ✓ All 3 source types validated")
|
|
finally:
|
|
os.unlink(config_path)
|
|
|
|
|
|
def test_config_validation_errors():
|
|
"""Test that invalid configs are rejected"""
|
|
# Invalid source type
|
|
config = {
|
|
"name": "test",
|
|
"description": "Test",
|
|
"sources": [{"type": "invalid_type", "url": "https://example.com"}],
|
|
}
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
json.dump(config, f)
|
|
config_path = f.name
|
|
|
|
try:
|
|
print("\n✓ Testing invalid source type...")
|
|
try:
|
|
# validate_config() calls .validate() automatically
|
|
_validator = validate_config(config_path)
|
|
raise AssertionError("Should have raised error for invalid source type")
|
|
except ValueError as e:
|
|
assert "Invalid" in str(e) or "invalid" in str(e)
|
|
print(" ✓ Invalid source type correctly rejected")
|
|
finally:
|
|
os.unlink(config_path)
|
|
|
|
|
|
# Run tests
|
|
if __name__ == "__main__":
|
|
print("=" * 60)
|
|
print("Running Unified Scraper Integration Tests")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
test_validate_existing_unified_configs()
|
|
test_backward_compatibility()
|
|
test_create_temp_unified_config()
|
|
test_mixed_source_types()
|
|
test_config_validation_errors()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("✅ All integration tests passed!")
|
|
print("=" * 60)
|
|
|
|
except AssertionError as e:
|
|
print(f"\n❌ Test failed: {e}")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"\n❌ Unexpected error: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
sys.exit(1)
|