This commit is contained in:
Pablo Estevez
2026-01-17 17:29:21 +00:00
parent c89f059712
commit 5ed767ff9a
144 changed files with 14142 additions and 16488 deletions

View File

@@ -8,22 +8,14 @@ Tests the multi-layer merging architecture:
- Layer 4: GitHub insights (issues)
"""
import pytest
from pathlib import Path
from unittest.mock import Mock
from skill_seekers.cli.conflict_detector import Conflict
from skill_seekers.cli.github_fetcher import CodeStream, DocsStream, InsightsStream, ThreeStreamData
from skill_seekers.cli.merge_sources import (
RuleBasedMerger,
_match_issues_to_apis,
categorize_issues_by_topic,
generate_hybrid_content,
RuleBasedMerger,
_match_issues_to_apis
)
from skill_seekers.cli.github_fetcher import (
CodeStream,
DocsStream,
InsightsStream,
ThreeStreamData
)
from skill_seekers.cli.conflict_detector import Conflict
class TestIssueCategorization:
@@ -32,68 +24,66 @@ class TestIssueCategorization:
def test_categorize_issues_basic(self):
"""Test basic issue categorization."""
problems = [
{'title': 'OAuth setup fails', 'labels': ['bug', 'oauth'], 'number': 1, 'state': 'open', 'comments': 10},
{'title': 'Testing framework issue', 'labels': ['testing'], 'number': 2, 'state': 'open', 'comments': 5}
{"title": "OAuth setup fails", "labels": ["bug", "oauth"], "number": 1, "state": "open", "comments": 10},
{"title": "Testing framework issue", "labels": ["testing"], "number": 2, "state": "open", "comments": 5},
]
solutions = [
{'title': 'Fixed OAuth redirect', 'labels': ['oauth'], 'number': 3, 'state': 'closed', 'comments': 3}
{"title": "Fixed OAuth redirect", "labels": ["oauth"], "number": 3, "state": "closed", "comments": 3}
]
topics = ['oauth', 'testing', 'async']
topics = ["oauth", "testing", "async"]
categorized = categorize_issues_by_topic(problems, solutions, topics)
assert 'oauth' in categorized
assert len(categorized['oauth']) == 2 # 1 problem + 1 solution
assert 'testing' in categorized
assert len(categorized['testing']) == 1
assert "oauth" in categorized
assert len(categorized["oauth"]) == 2 # 1 problem + 1 solution
assert "testing" in categorized
assert len(categorized["testing"]) == 1
def test_categorize_issues_keyword_matching(self):
"""Test keyword matching in titles and labels."""
problems = [
{'title': 'Database connection timeout', 'labels': ['db'], 'number': 1, 'state': 'open', 'comments': 7}
{"title": "Database connection timeout", "labels": ["db"], "number": 1, "state": "open", "comments": 7}
]
solutions = []
topics = ['database']
topics = ["database"]
categorized = categorize_issues_by_topic(problems, solutions, topics)
# Should match 'database' topic due to 'db' in labels
assert 'database' in categorized or 'other' in categorized
assert "database" in categorized or "other" in categorized
def test_categorize_issues_multi_keyword_topic(self):
"""Test topics with multiple keywords."""
problems = [
{'title': 'Async API call fails', 'labels': ['async', 'api'], 'number': 1, 'state': 'open', 'comments': 8}
{"title": "Async API call fails", "labels": ["async", "api"], "number": 1, "state": "open", "comments": 8}
]
solutions = []
topics = ['async api']
topics = ["async api"]
categorized = categorize_issues_by_topic(problems, solutions, topics)
# Should match due to both 'async' and 'api' in labels
assert 'async api' in categorized
assert len(categorized['async api']) == 1
assert "async api" in categorized
assert len(categorized["async api"]) == 1
def test_categorize_issues_no_match_goes_to_other(self):
"""Test that unmatched issues go to 'other' category."""
problems = [
{'title': 'Random issue', 'labels': ['misc'], 'number': 1, 'state': 'open', 'comments': 5}
]
problems = [{"title": "Random issue", "labels": ["misc"], "number": 1, "state": "open", "comments": 5}]
solutions = []
topics = ['oauth', 'testing']
topics = ["oauth", "testing"]
categorized = categorize_issues_by_topic(problems, solutions, topics)
assert 'other' in categorized
assert len(categorized['other']) == 1
assert "other" in categorized
assert len(categorized["other"]) == 1
def test_categorize_issues_empty_lists(self):
"""Test categorization with empty input."""
categorized = categorize_issues_by_topic([], [], ['oauth'])
categorized = categorize_issues_by_topic([], [], ["oauth"])
# Should return empty dict (no categories with issues)
assert len(categorized) == 0
@@ -104,36 +94,23 @@ class TestHybridContent:
def test_generate_hybrid_content_basic(self):
"""Test basic hybrid content generation."""
api_data = {
'apis': {
'oauth_login': {'name': 'oauth_login', 'status': 'matched'}
},
'summary': {'total_apis': 1}
}
api_data = {"apis": {"oauth_login": {"name": "oauth_login", "status": "matched"}}, "summary": {"total_apis": 1}}
github_docs = {
'readme': '# Project README',
'contributing': None,
'docs_files': [{'path': 'docs/oauth.md', 'content': 'OAuth guide'}]
"readme": "# Project README",
"contributing": None,
"docs_files": [{"path": "docs/oauth.md", "content": "OAuth guide"}],
}
github_insights = {
'metadata': {
'stars': 1234,
'forks': 56,
'language': 'Python',
'description': 'Test project'
},
'common_problems': [
{'title': 'OAuth fails', 'number': 42, 'state': 'open', 'comments': 10, 'labels': ['bug']}
"metadata": {"stars": 1234, "forks": 56, "language": "Python", "description": "Test project"},
"common_problems": [
{"title": "OAuth fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug"]}
],
'known_solutions': [
{'title': 'Fixed OAuth', 'number': 35, 'state': 'closed', 'comments': 5, 'labels': ['bug']}
"known_solutions": [
{"title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": ["bug"]}
],
'top_labels': [
{'label': 'bug', 'count': 10},
{'label': 'enhancement', 'count': 5}
]
"top_labels": [{"label": "bug", "count": 10}, {"label": "enhancement", "count": 5}],
}
conflicts = []
@@ -141,68 +118,68 @@ class TestHybridContent:
hybrid = generate_hybrid_content(api_data, github_docs, github_insights, conflicts)
# Check structure
assert 'api_reference' in hybrid
assert 'github_context' in hybrid
assert 'conflict_summary' in hybrid
assert 'issue_links' in hybrid
assert "api_reference" in hybrid
assert "github_context" in hybrid
assert "conflict_summary" in hybrid
assert "issue_links" in hybrid
# Check GitHub docs layer
assert hybrid['github_context']['docs']['readme'] == '# Project README'
assert hybrid['github_context']['docs']['docs_files_count'] == 1
assert hybrid["github_context"]["docs"]["readme"] == "# Project README"
assert hybrid["github_context"]["docs"]["docs_files_count"] == 1
# Check GitHub insights layer
assert hybrid['github_context']['metadata']['stars'] == 1234
assert hybrid['github_context']['metadata']['language'] == 'Python'
assert hybrid['github_context']['issues']['common_problems_count'] == 1
assert hybrid['github_context']['issues']['known_solutions_count'] == 1
assert len(hybrid['github_context']['issues']['top_problems']) == 1
assert len(hybrid['github_context']['top_labels']) == 2
assert hybrid["github_context"]["metadata"]["stars"] == 1234
assert hybrid["github_context"]["metadata"]["language"] == "Python"
assert hybrid["github_context"]["issues"]["common_problems_count"] == 1
assert hybrid["github_context"]["issues"]["known_solutions_count"] == 1
assert len(hybrid["github_context"]["issues"]["top_problems"]) == 1
assert len(hybrid["github_context"]["top_labels"]) == 2
def test_generate_hybrid_content_with_conflicts(self):
"""Test hybrid content with conflicts."""
api_data = {'apis': {}, 'summary': {}}
api_data = {"apis": {}, "summary": {}}
github_docs = None
github_insights = None
conflicts = [
Conflict(
api_name='test_api',
type='signature_mismatch',
severity='medium',
difference='Parameter count differs',
docs_info={'parameters': ['a', 'b']},
code_info={'parameters': ['a', 'b', 'c']}
api_name="test_api",
type="signature_mismatch",
severity="medium",
difference="Parameter count differs",
docs_info={"parameters": ["a", "b"]},
code_info={"parameters": ["a", "b", "c"]},
),
Conflict(
api_name='test_api_2',
type='missing_in_docs',
severity='low',
difference='API not documented',
api_name="test_api_2",
type="missing_in_docs",
severity="low",
difference="API not documented",
docs_info=None,
code_info={'name': 'test_api_2'}
)
code_info={"name": "test_api_2"},
),
]
hybrid = generate_hybrid_content(api_data, github_docs, github_insights, conflicts)
# Check conflict summary
assert hybrid['conflict_summary']['total_conflicts'] == 2
assert hybrid['conflict_summary']['by_type']['signature_mismatch'] == 1
assert hybrid['conflict_summary']['by_type']['missing_in_docs'] == 1
assert hybrid['conflict_summary']['by_severity']['medium'] == 1
assert hybrid['conflict_summary']['by_severity']['low'] == 1
assert hybrid["conflict_summary"]["total_conflicts"] == 2
assert hybrid["conflict_summary"]["by_type"]["signature_mismatch"] == 1
assert hybrid["conflict_summary"]["by_type"]["missing_in_docs"] == 1
assert hybrid["conflict_summary"]["by_severity"]["medium"] == 1
assert hybrid["conflict_summary"]["by_severity"]["low"] == 1
def test_generate_hybrid_content_no_github_data(self):
"""Test hybrid content with no GitHub data."""
api_data = {'apis': {}, 'summary': {}}
api_data = {"apis": {}, "summary": {}}
hybrid = generate_hybrid_content(api_data, None, None, [])
# Should still have structure, but no GitHub context
assert 'api_reference' in hybrid
assert 'github_context' in hybrid
assert hybrid['github_context'] == {}
assert hybrid['conflict_summary']['total_conflicts'] == 0
assert "api_reference" in hybrid
assert "github_context" in hybrid
assert hybrid["github_context"] == {}
assert hybrid["conflict_summary"]["total_conflicts"] == 0
class TestIssueToAPIMatching:
@@ -210,39 +187,34 @@ class TestIssueToAPIMatching:
def test_match_issues_to_apis_basic(self):
"""Test basic issue to API matching."""
apis = {
'oauth_login': {'name': 'oauth_login'},
'async_fetch': {'name': 'async_fetch'}
}
apis = {"oauth_login": {"name": "oauth_login"}, "async_fetch": {"name": "async_fetch"}}
problems = [
{'title': 'OAuth login fails', 'number': 42, 'state': 'open', 'comments': 10, 'labels': ['bug', 'oauth']}
{"title": "OAuth login fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug", "oauth"]}
]
solutions = [
{'title': 'Fixed async fetch timeout', 'number': 35, 'state': 'closed', 'comments': 5, 'labels': ['async']}
{"title": "Fixed async fetch timeout", "number": 35, "state": "closed", "comments": 5, "labels": ["async"]}
]
issue_links = _match_issues_to_apis(apis, problems, solutions)
# Should match oauth issue to oauth_login API
assert 'oauth_login' in issue_links
assert len(issue_links['oauth_login']) == 1
assert issue_links['oauth_login'][0]['number'] == 42
assert "oauth_login" in issue_links
assert len(issue_links["oauth_login"]) == 1
assert issue_links["oauth_login"][0]["number"] == 42
# Should match async issue to async_fetch API
assert 'async_fetch' in issue_links
assert len(issue_links['async_fetch']) == 1
assert issue_links['async_fetch'][0]['number'] == 35
assert "async_fetch" in issue_links
assert len(issue_links["async_fetch"]) == 1
assert issue_links["async_fetch"][0]["number"] == 35
def test_match_issues_to_apis_no_matches(self):
"""Test when no issues match any APIs."""
apis = {
'database_connect': {'name': 'database_connect'}
}
apis = {"database_connect": {"name": "database_connect"}}
problems = [
{'title': 'Random unrelated issue', 'number': 1, 'state': 'open', 'comments': 5, 'labels': ['misc']}
{"title": "Random unrelated issue", "number": 1, "state": "open", "comments": 5, "labels": ["misc"]}
]
issue_links = _match_issues_to_apis(apis, problems, [])
@@ -252,19 +224,15 @@ class TestIssueToAPIMatching:
def test_match_issues_to_apis_dotted_names(self):
"""Test matching with dotted API names."""
apis = {
'module.oauth.login': {'name': 'module.oauth.login'}
}
apis = {"module.oauth.login": {"name": "module.oauth.login"}}
problems = [
{'title': 'OAuth module fails', 'number': 42, 'state': 'open', 'comments': 10, 'labels': ['oauth']}
]
problems = [{"title": "OAuth module fails", "number": 42, "state": "open", "comments": 10, "labels": ["oauth"]}]
issue_links = _match_issues_to_apis(apis, problems, [])
# Should match due to 'oauth' keyword
assert 'module.oauth.login' in issue_links
assert len(issue_links['module.oauth.login']) == 1
assert "module.oauth.login" in issue_links
assert len(issue_links["module.oauth.login"]) == 1
class TestRuleBasedMergerWithGitHubStreams:
@@ -272,26 +240,22 @@ class TestRuleBasedMergerWithGitHubStreams:
def test_merger_with_github_streams(self, tmp_path):
"""Test merger with three-stream GitHub data."""
docs_data = {'pages': []}
github_data = {'apis': {}}
docs_data = {"pages": []}
github_data = {"apis": {}}
conflicts = []
# Create three-stream data
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(
readme='# README',
contributing='# Contributing',
docs_files=[{'path': 'docs/guide.md', 'content': 'Guide content'}]
readme="# README",
contributing="# Contributing",
docs_files=[{"path": "docs/guide.md", "content": "Guide content"}],
)
insights_stream = InsightsStream(
metadata={'stars': 1234, 'forks': 56, 'language': 'Python'},
common_problems=[
{'title': 'Bug 1', 'number': 1, 'state': 'open', 'comments': 10, 'labels': ['bug']}
],
known_solutions=[
{'title': 'Fix 1', 'number': 2, 'state': 'closed', 'comments': 5, 'labels': ['bug']}
],
top_labels=[{'label': 'bug', 'count': 10}]
metadata={"stars": 1234, "forks": 56, "language": "Python"},
common_problems=[{"title": "Bug 1", "number": 1, "state": "open", "comments": 10, "labels": ["bug"]}],
known_solutions=[{"title": "Fix 1", "number": 2, "state": "closed", "comments": 5, "labels": ["bug"]}],
top_labels=[{"label": "bug", "count": 10}],
)
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
@@ -301,24 +265,19 @@ class TestRuleBasedMergerWithGitHubStreams:
assert merger.github_streams is not None
assert merger.github_docs is not None
assert merger.github_insights is not None
assert merger.github_docs['readme'] == '# README'
assert merger.github_insights['metadata']['stars'] == 1234
assert merger.github_docs["readme"] == "# README"
assert merger.github_insights["metadata"]["stars"] == 1234
def test_merger_merge_all_with_streams(self, tmp_path):
"""Test merge_all() with GitHub streams."""
docs_data = {'pages': []}
github_data = {'apis': {}}
docs_data = {"pages": []}
github_data = {"apis": {}}
conflicts = []
# Create three-stream data
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme='# README', contributing=None, docs_files=[])
insights_stream = InsightsStream(
metadata={'stars': 500},
common_problems=[],
known_solutions=[],
top_labels=[]
)
docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[])
insights_stream = InsightsStream(metadata={"stars": 500}, common_problems=[], known_solutions=[], top_labels=[])
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Create and run merger
@@ -326,15 +285,15 @@ class TestRuleBasedMergerWithGitHubStreams:
result = merger.merge_all()
# Check result has GitHub context
assert 'github_context' in result
assert 'conflict_summary' in result
assert 'issue_links' in result
assert result['github_context']['metadata']['stars'] == 500
assert "github_context" in result
assert "conflict_summary" in result
assert "issue_links" in result
assert result["github_context"]["metadata"]["stars"] == 500
def test_merger_without_streams_backward_compat(self):
"""Test backward compatibility without GitHub streams."""
docs_data = {'pages': []}
github_data = {'apis': {}}
docs_data = {"pages": []}
github_data = {"apis": {}}
conflicts = []
# Create merger without streams (old API)
@@ -346,10 +305,10 @@ class TestRuleBasedMergerWithGitHubStreams:
# Should still work
result = merger.merge_all()
assert 'apis' in result
assert 'summary' in result
assert "apis" in result
assert "summary" in result
# Should not have GitHub context
assert 'github_context' not in result
assert "github_context" not in result
class TestIntegration:
@@ -358,39 +317,52 @@ class TestIntegration:
def test_full_pipeline_with_streams(self, tmp_path):
"""Test complete pipeline with three-stream data."""
# Create minimal test data
docs_data = {'pages': []}
github_data = {'apis': {}}
docs_data = {"pages": []}
github_data = {"apis": {}}
# Create three-stream data
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(
readme='# Test Project\n\nA test project.',
contributing='# Contributing\n\nPull requests welcome.',
readme="# Test Project\n\nA test project.",
contributing="# Contributing\n\nPull requests welcome.",
docs_files=[
{'path': 'docs/quickstart.md', 'content': '# Quick Start'},
{'path': 'docs/api.md', 'content': '# API Reference'}
]
{"path": "docs/quickstart.md", "content": "# Quick Start"},
{"path": "docs/api.md", "content": "# API Reference"},
],
)
insights_stream = InsightsStream(
metadata={
'stars': 2500,
'forks': 123,
'language': 'Python',
'description': 'Test framework'
},
metadata={"stars": 2500, "forks": 123, "language": "Python", "description": "Test framework"},
common_problems=[
{'title': 'Installation fails on Windows', 'number': 150, 'state': 'open', 'comments': 25, 'labels': ['bug', 'windows']},
{'title': 'Memory leak in async mode', 'number': 142, 'state': 'open', 'comments': 18, 'labels': ['bug', 'async']}
{
"title": "Installation fails on Windows",
"number": 150,
"state": "open",
"comments": 25,
"labels": ["bug", "windows"],
},
{
"title": "Memory leak in async mode",
"number": 142,
"state": "open",
"comments": 18,
"labels": ["bug", "async"],
},
],
known_solutions=[
{'title': 'Fixed config loading', 'number': 130, 'state': 'closed', 'comments': 8, 'labels': ['bug']},
{'title': 'Resolved OAuth timeout', 'number': 125, 'state': 'closed', 'comments': 12, 'labels': ['oauth']}
{"title": "Fixed config loading", "number": 130, "state": "closed", "comments": 8, "labels": ["bug"]},
{
"title": "Resolved OAuth timeout",
"number": 125,
"state": "closed",
"comments": 12,
"labels": ["oauth"],
},
],
top_labels=[
{'label': 'bug', 'count': 45},
{'label': 'enhancement', 'count': 20},
{'label': 'question', 'count': 15}
]
{"label": "bug", "count": 45},
{"label": "enhancement", "count": 20},
{"label": "question", "count": 15},
],
)
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
@@ -399,24 +371,24 @@ class TestIntegration:
result = merger.merge_all()
# Verify all layers present
assert 'apis' in result # Layer 1 & 2: Code + Docs
assert 'github_context' in result # Layer 3 & 4: GitHub docs + insights
assert "apis" in result # Layer 1 & 2: Code + Docs
assert "github_context" in result # Layer 3 & 4: GitHub docs + insights
# Verify Layer 3: GitHub docs
gh_context = result['github_context']
assert gh_context['docs']['readme'] == '# Test Project\n\nA test project.'
assert gh_context['docs']['contributing'] == '# Contributing\n\nPull requests welcome.'
assert gh_context['docs']['docs_files_count'] == 2
gh_context = result["github_context"]
assert gh_context["docs"]["readme"] == "# Test Project\n\nA test project."
assert gh_context["docs"]["contributing"] == "# Contributing\n\nPull requests welcome."
assert gh_context["docs"]["docs_files_count"] == 2
# Verify Layer 4: GitHub insights
assert gh_context['metadata']['stars'] == 2500
assert gh_context['metadata']['language'] == 'Python'
assert gh_context['issues']['common_problems_count'] == 2
assert gh_context['issues']['known_solutions_count'] == 2
assert len(gh_context['issues']['top_problems']) == 2
assert len(gh_context['issues']['top_solutions']) == 2
assert len(gh_context['top_labels']) == 3
assert gh_context["metadata"]["stars"] == 2500
assert gh_context["metadata"]["language"] == "Python"
assert gh_context["issues"]["common_problems_count"] == 2
assert gh_context["issues"]["known_solutions_count"] == 2
assert len(gh_context["issues"]["top_problems"]) == 2
assert len(gh_context["issues"]["top_solutions"]) == 2
assert len(gh_context["top_labels"]) == 3
# Verify conflict summary
assert 'conflict_summary' in result
assert result['conflict_summary']['total_conflicts'] == 0
assert "conflict_summary" in result
assert result["conflict_summary"]["total_conflicts"] == 0