""" Tests for Phase 3: Enhanced Source Merging with GitHub Streams Tests the multi-layer merging architecture: - Layer 1: C3.x code (ground truth) - Layer 2: HTML docs (official intent) - Layer 3: GitHub docs (README/CONTRIBUTING) - Layer 4: GitHub insights (issues) """ from skill_seekers.cli.conflict_detector import Conflict from skill_seekers.cli.github_fetcher import CodeStream, DocsStream, InsightsStream, ThreeStreamData from skill_seekers.cli.merge_sources import ( RuleBasedMerger, _match_issues_to_apis, categorize_issues_by_topic, generate_hybrid_content, ) class TestIssueCategorization: """Test issue categorization by topic.""" def test_categorize_issues_basic(self): """Test basic issue categorization.""" problems = [ { "title": "OAuth setup fails", "labels": ["bug", "oauth"], "number": 1, "state": "open", "comments": 10, }, { "title": "Testing framework issue", "labels": ["testing"], "number": 2, "state": "open", "comments": 5, }, ] solutions = [ { "title": "Fixed OAuth redirect", "labels": ["oauth"], "number": 3, "state": "closed", "comments": 3, } ] topics = ["oauth", "testing", "async"] categorized = categorize_issues_by_topic(problems, solutions, topics) assert "oauth" in categorized assert len(categorized["oauth"]) == 2 # 1 problem + 1 solution assert "testing" in categorized assert len(categorized["testing"]) == 1 def test_categorize_issues_keyword_matching(self): """Test keyword matching in titles and labels.""" problems = [ { "title": "Database connection timeout", "labels": ["db"], "number": 1, "state": "open", "comments": 7, } ] solutions = [] topics = ["database"] categorized = categorize_issues_by_topic(problems, solutions, topics) # Should match 'database' topic due to 'db' in labels assert "database" in categorized or "other" in categorized def test_categorize_issues_multi_keyword_topic(self): """Test topics with multiple keywords.""" problems = [ { "title": "Async API call fails", "labels": ["async", "api"], "number": 1, "state": "open", "comments": 8, } ] solutions = [] topics = ["async api"] categorized = categorize_issues_by_topic(problems, solutions, topics) # Should match due to both 'async' and 'api' in labels assert "async api" in categorized assert len(categorized["async api"]) == 1 def test_categorize_issues_no_match_goes_to_other(self): """Test that unmatched issues go to 'other' category.""" problems = [ { "title": "Random issue", "labels": ["misc"], "number": 1, "state": "open", "comments": 5, } ] solutions = [] topics = ["oauth", "testing"] categorized = categorize_issues_by_topic(problems, solutions, topics) assert "other" in categorized assert len(categorized["other"]) == 1 def test_categorize_issues_empty_lists(self): """Test categorization with empty input.""" categorized = categorize_issues_by_topic([], [], ["oauth"]) # Should return empty dict (no categories with issues) assert len(categorized) == 0 class TestHybridContent: """Test hybrid content generation.""" def test_generate_hybrid_content_basic(self): """Test basic hybrid content generation.""" api_data = { "apis": {"oauth_login": {"name": "oauth_login", "status": "matched"}}, "summary": {"total_apis": 1}, } github_docs = { "readme": "# Project README", "contributing": None, "docs_files": [{"path": "docs/oauth.md", "content": "OAuth guide"}], } github_insights = { "metadata": { "stars": 1234, "forks": 56, "language": "Python", "description": "Test project", }, "common_problems": [ { "title": "OAuth fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug"], } ], "known_solutions": [ { "title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": ["bug"], } ], "top_labels": [{"label": "bug", "count": 10}, {"label": "enhancement", "count": 5}], } conflicts = [] hybrid = generate_hybrid_content(api_data, github_docs, github_insights, conflicts) # Check structure assert "api_reference" in hybrid assert "github_context" in hybrid assert "conflict_summary" in hybrid assert "issue_links" in hybrid # Check GitHub docs layer assert hybrid["github_context"]["docs"]["readme"] == "# Project README" assert hybrid["github_context"]["docs"]["docs_files_count"] == 1 # Check GitHub insights layer assert hybrid["github_context"]["metadata"]["stars"] == 1234 assert hybrid["github_context"]["metadata"]["language"] == "Python" assert hybrid["github_context"]["issues"]["common_problems_count"] == 1 assert hybrid["github_context"]["issues"]["known_solutions_count"] == 1 assert len(hybrid["github_context"]["issues"]["top_problems"]) == 1 assert len(hybrid["github_context"]["top_labels"]) == 2 def test_generate_hybrid_content_with_conflicts(self): """Test hybrid content with conflicts.""" api_data = {"apis": {}, "summary": {}} github_docs = None github_insights = None conflicts = [ Conflict( api_name="test_api", type="signature_mismatch", severity="medium", difference="Parameter count differs", docs_info={"parameters": ["a", "b"]}, code_info={"parameters": ["a", "b", "c"]}, ), Conflict( api_name="test_api_2", type="missing_in_docs", severity="low", difference="API not documented", docs_info=None, code_info={"name": "test_api_2"}, ), ] hybrid = generate_hybrid_content(api_data, github_docs, github_insights, conflicts) # Check conflict summary assert hybrid["conflict_summary"]["total_conflicts"] == 2 assert hybrid["conflict_summary"]["by_type"]["signature_mismatch"] == 1 assert hybrid["conflict_summary"]["by_type"]["missing_in_docs"] == 1 assert hybrid["conflict_summary"]["by_severity"]["medium"] == 1 assert hybrid["conflict_summary"]["by_severity"]["low"] == 1 def test_generate_hybrid_content_no_github_data(self): """Test hybrid content with no GitHub data.""" api_data = {"apis": {}, "summary": {}} hybrid = generate_hybrid_content(api_data, None, None, []) # Should still have structure, but no GitHub context assert "api_reference" in hybrid assert "github_context" in hybrid assert hybrid["github_context"] == {} assert hybrid["conflict_summary"]["total_conflicts"] == 0 class TestIssueToAPIMatching: """Test matching issues to APIs.""" def test_match_issues_to_apis_basic(self): """Test basic issue to API matching.""" apis = {"oauth_login": {"name": "oauth_login"}, "async_fetch": {"name": "async_fetch"}} problems = [ { "title": "OAuth login fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug", "oauth"], } ] solutions = [ { "title": "Fixed async fetch timeout", "number": 35, "state": "closed", "comments": 5, "labels": ["async"], } ] issue_links = _match_issues_to_apis(apis, problems, solutions) # Should match oauth issue to oauth_login API assert "oauth_login" in issue_links assert len(issue_links["oauth_login"]) == 1 assert issue_links["oauth_login"][0]["number"] == 42 # Should match async issue to async_fetch API assert "async_fetch" in issue_links assert len(issue_links["async_fetch"]) == 1 assert issue_links["async_fetch"][0]["number"] == 35 def test_match_issues_to_apis_no_matches(self): """Test when no issues match any APIs.""" apis = {"database_connect": {"name": "database_connect"}} problems = [ { "title": "Random unrelated issue", "number": 1, "state": "open", "comments": 5, "labels": ["misc"], } ] issue_links = _match_issues_to_apis(apis, problems, []) # Should be empty - no matches assert len(issue_links) == 0 def test_match_issues_to_apis_dotted_names(self): """Test matching with dotted API names.""" apis = {"module.oauth.login": {"name": "module.oauth.login"}} problems = [ { "title": "OAuth module fails", "number": 42, "state": "open", "comments": 10, "labels": ["oauth"], } ] issue_links = _match_issues_to_apis(apis, problems, []) # Should match due to 'oauth' keyword assert "module.oauth.login" in issue_links assert len(issue_links["module.oauth.login"]) == 1 class TestRuleBasedMergerWithGitHubStreams: """Test RuleBasedMerger with GitHub streams.""" def test_merger_with_github_streams(self, tmp_path): """Test merger with three-stream GitHub data.""" docs_data = {"pages": []} github_data = {"apis": {}} conflicts = [] # Create three-stream data code_stream = CodeStream(directory=tmp_path, files=[]) docs_stream = DocsStream( readme="# README", contributing="# Contributing", docs_files=[{"path": "docs/guide.md", "content": "Guide content"}], ) insights_stream = InsightsStream( metadata={"stars": 1234, "forks": 56, "language": "Python"}, common_problems=[ {"title": "Bug 1", "number": 1, "state": "open", "comments": 10, "labels": ["bug"]} ], known_solutions=[ {"title": "Fix 1", "number": 2, "state": "closed", "comments": 5, "labels": ["bug"]} ], top_labels=[{"label": "bug", "count": 10}], ) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) # Create merger with streams merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams) assert merger.github_streams is not None assert merger.github_docs is not None assert merger.github_insights is not None assert merger.github_docs["readme"] == "# README" assert merger.github_insights["metadata"]["stars"] == 1234 def test_merger_merge_all_with_streams(self, tmp_path): """Test merge_all() with GitHub streams.""" docs_data = {"pages": []} github_data = {"apis": {}} conflicts = [] # Create three-stream data code_stream = CodeStream(directory=tmp_path, files=[]) docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[]) insights_stream = InsightsStream( metadata={"stars": 500}, common_problems=[], known_solutions=[], top_labels=[] ) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) # Create and run merger merger = RuleBasedMerger(docs_data, github_data, conflicts, github_streams) result = merger.merge_all() # Check result has GitHub context assert "github_context" in result assert "conflict_summary" in result assert "issue_links" in result assert result["github_context"]["metadata"]["stars"] == 500 def test_merger_without_streams_backward_compat(self): """Test backward compatibility without GitHub streams.""" docs_data = {"pages": []} github_data = {"apis": {}} conflicts = [] # Create merger without streams (old API) merger = RuleBasedMerger(docs_data, github_data, conflicts) assert merger.github_streams is None assert merger.github_docs is None assert merger.github_insights is None # Should still work result = merger.merge_all() assert "apis" in result assert "summary" in result # Should not have GitHub context assert "github_context" not in result class TestIntegration: """Integration tests for Phase 3.""" def test_full_pipeline_with_streams(self, tmp_path): """Test complete pipeline with three-stream data.""" # Create minimal test data docs_data = {"pages": []} github_data = {"apis": {}} # Create three-stream data code_stream = CodeStream(directory=tmp_path, files=[]) docs_stream = DocsStream( readme="# Test Project\n\nA test project.", contributing="# Contributing\n\nPull requests welcome.", docs_files=[ {"path": "docs/quickstart.md", "content": "# Quick Start"}, {"path": "docs/api.md", "content": "# API Reference"}, ], ) insights_stream = InsightsStream( metadata={ "stars": 2500, "forks": 123, "language": "Python", "description": "Test framework", }, common_problems=[ { "title": "Installation fails on Windows", "number": 150, "state": "open", "comments": 25, "labels": ["bug", "windows"], }, { "title": "Memory leak in async mode", "number": 142, "state": "open", "comments": 18, "labels": ["bug", "async"], }, ], known_solutions=[ { "title": "Fixed config loading", "number": 130, "state": "closed", "comments": 8, "labels": ["bug"], }, { "title": "Resolved OAuth timeout", "number": 125, "state": "closed", "comments": 12, "labels": ["oauth"], }, ], top_labels=[ {"label": "bug", "count": 45}, {"label": "enhancement", "count": 20}, {"label": "question", "count": 15}, ], ) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) # Create merger and merge merger = RuleBasedMerger(docs_data, github_data, [], github_streams) result = merger.merge_all() # Verify all layers present assert "apis" in result # Layer 1 & 2: Code + Docs assert "github_context" in result # Layer 3 & 4: GitHub docs + insights # Verify Layer 3: GitHub docs gh_context = result["github_context"] assert gh_context["docs"]["readme"] == "# Test Project\n\nA test project." assert gh_context["docs"]["contributing"] == "# Contributing\n\nPull requests welcome." assert gh_context["docs"]["docs_files_count"] == 2 # Verify Layer 4: GitHub insights assert gh_context["metadata"]["stars"] == 2500 assert gh_context["metadata"]["language"] == "Python" assert gh_context["issues"]["common_problems_count"] == 2 assert gh_context["issues"]["known_solutions_count"] == 2 assert len(gh_context["issues"]["top_problems"]) == 2 assert len(gh_context["issues"]["top_solutions"]) == 2 assert len(gh_context["top_labels"]) == 3 # Verify conflict summary assert "conflict_summary" in result assert result["conflict_summary"]["total_conflicts"] == 0