This commit is contained in:
Pablo Estevez
2026-01-17 17:29:21 +00:00
parent c89f059712
commit 5ed767ff9a
144 changed files with 14142 additions and 16488 deletions

View File

@@ -7,16 +7,17 @@ Tests the three-stream architecture that splits GitHub repositories into:
- Insights stream (issues, metadata)
"""
import pytest
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
from unittest.mock import Mock, patch
import pytest
from skill_seekers.cli.github_fetcher import (
CodeStream,
DocsStream,
GitHubThreeStreamFetcher,
InsightsStream,
ThreeStreamData,
GitHubThreeStreamFetcher
)
@@ -25,10 +26,7 @@ class TestDataClasses:
def test_code_stream(self):
"""Test CodeStream data class."""
code_stream = CodeStream(
directory=Path("/tmp/repo"),
files=[Path("/tmp/repo/src/main.py")]
)
code_stream = CodeStream(directory=Path("/tmp/repo"), files=[Path("/tmp/repo/src/main.py")])
assert code_stream.directory == Path("/tmp/repo")
assert len(code_stream.files) == 1
@@ -37,7 +35,7 @@ class TestDataClasses:
docs_stream = DocsStream(
readme="# README",
contributing="# Contributing",
docs_files=[{"path": "docs/guide.md", "content": "# Guide"}]
docs_files=[{"path": "docs/guide.md", "content": "# Guide"}],
)
assert docs_stream.readme == "# README"
assert docs_stream.contributing == "# Contributing"
@@ -49,7 +47,7 @@ class TestDataClasses:
metadata={"stars": 1234, "forks": 56},
common_problems=[{"title": "Bug", "number": 42}],
known_solutions=[{"title": "Fix", "number": 35}],
top_labels=[{"label": "bug", "count": 10}]
top_labels=[{"label": "bug", "count": 10}],
)
assert insights_stream.metadata["stars"] == 1234
assert len(insights_stream.common_problems) == 1
@@ -61,7 +59,7 @@ class TestDataClasses:
three_streams = ThreeStreamData(
code_stream=CodeStream(Path("/tmp"), []),
docs_stream=DocsStream(None, None, []),
insights_stream=InsightsStream({}, [], [], [])
insights_stream=InsightsStream({}, [], [], []),
)
assert isinstance(three_streams.code_stream, CodeStream)
assert isinstance(three_streams.docs_stream, DocsStream)
@@ -94,11 +92,11 @@ class TestGitHubFetcherInit:
with pytest.raises(ValueError):
GitHubThreeStreamFetcher("https://invalid.com/repo")
@patch.dict('os.environ', {'GITHUB_TOKEN': 'test_token'})
@patch.dict("os.environ", {"GITHUB_TOKEN": "test_token"})
def test_github_token_from_env(self):
"""Test GitHub token loaded from environment."""
fetcher = GitHubThreeStreamFetcher("https://github.com/facebook/react")
assert fetcher.github_token == 'test_token'
assert fetcher.github_token == "test_token"
class TestFileClassification:
@@ -148,7 +146,7 @@ class TestFileClassification:
def test_classify_various_code_extensions(self, tmp_path):
"""Test classification of various code file extensions."""
extensions = ['.py', '.js', '.ts', '.go', '.rs', '.java', '.kt', '.rb', '.php']
extensions = [".py", ".js", ".ts", ".go", ".rs", ".java", ".kt", ".rb", ".php"]
for ext in extensions:
(tmp_path / f"file{ext}").write_text("code")
@@ -166,77 +164,71 @@ class TestIssueAnalysis:
"""Test extraction of common problems (open issues with 5+ comments)."""
issues = [
{
'title': 'OAuth fails',
'number': 42,
'state': 'open',
'comments': 10,
'labels': [{'name': 'bug'}, {'name': 'oauth'}]
"title": "OAuth fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": [{"name": "bug"}, {"name": "oauth"}],
},
{
'title': 'Minor issue',
'number': 43,
'state': 'open',
'comments': 2, # Too few comments
'labels': []
}
"title": "Minor issue",
"number": 43,
"state": "open",
"comments": 2, # Too few comments
"labels": [],
},
]
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
insights = fetcher.analyze_issues(issues)
assert len(insights['common_problems']) == 1
assert insights['common_problems'][0]['number'] == 42
assert insights['common_problems'][0]['comments'] == 10
assert len(insights["common_problems"]) == 1
assert insights["common_problems"][0]["number"] == 42
assert insights["common_problems"][0]["comments"] == 10
def test_analyze_issues_known_solutions(self):
"""Test extraction of known solutions (closed issues with comments)."""
issues = [
{"title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": [{"name": "bug"}]},
{
'title': 'Fixed OAuth',
'number': 35,
'state': 'closed',
'comments': 5,
'labels': [{'name': 'bug'}]
"title": "Closed without comments",
"number": 36,
"state": "closed",
"comments": 0, # No comments
"labels": [],
},
{
'title': 'Closed without comments',
'number': 36,
'state': 'closed',
'comments': 0, # No comments
'labels': []
}
]
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
insights = fetcher.analyze_issues(issues)
assert len(insights['known_solutions']) == 1
assert insights['known_solutions'][0]['number'] == 35
assert len(insights["known_solutions"]) == 1
assert insights["known_solutions"][0]["number"] == 35
def test_analyze_issues_top_labels(self):
"""Test counting of top issue labels."""
issues = [
{'state': 'open', 'comments': 5, 'labels': [{'name': 'bug'}, {'name': 'oauth'}]},
{'state': 'open', 'comments': 5, 'labels': [{'name': 'bug'}]},
{'state': 'closed', 'comments': 3, 'labels': [{'name': 'enhancement'}]}
{"state": "open", "comments": 5, "labels": [{"name": "bug"}, {"name": "oauth"}]},
{"state": "open", "comments": 5, "labels": [{"name": "bug"}]},
{"state": "closed", "comments": 3, "labels": [{"name": "enhancement"}]},
]
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
insights = fetcher.analyze_issues(issues)
# Bug should be top label (appears twice)
assert insights['top_labels'][0]['label'] == 'bug'
assert insights['top_labels'][0]['count'] == 2
assert insights["top_labels"][0]["label"] == "bug"
assert insights["top_labels"][0]["count"] == 2
def test_analyze_issues_limits_to_10(self):
"""Test that analysis limits results to top 10."""
issues = [
{
'title': f'Issue {i}',
'number': i,
'state': 'open',
'comments': 20 - i, # Descending comment count
'labels': []
"title": f"Issue {i}",
"number": i,
"state": "open",
"comments": 20 - i, # Descending comment count
"labels": [],
}
for i in range(20)
]
@@ -244,28 +236,28 @@ class TestIssueAnalysis:
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
insights = fetcher.analyze_issues(issues)
assert len(insights['common_problems']) <= 10
assert len(insights["common_problems"]) <= 10
# Should be sorted by comment count (descending)
if len(insights['common_problems']) > 1:
assert insights['common_problems'][0]['comments'] >= insights['common_problems'][1]['comments']
if len(insights["common_problems"]) > 1:
assert insights["common_problems"][0]["comments"] >= insights["common_problems"][1]["comments"]
class TestGitHubAPI:
"""Test GitHub API interactions."""
@patch('requests.get')
@patch("requests.get")
def test_fetch_github_metadata(self, mock_get):
"""Test fetching repository metadata via GitHub API."""
mock_response = Mock()
mock_response.json.return_value = {
'stargazers_count': 1234,
'forks_count': 56,
'open_issues_count': 12,
'language': 'Python',
'description': 'Test repo',
'homepage': 'https://example.com',
'created_at': '2020-01-01',
'updated_at': '2024-01-01'
"stargazers_count": 1234,
"forks_count": 56,
"open_issues_count": 12,
"language": "Python",
"description": "Test repo",
"homepage": "https://example.com",
"created_at": "2020-01-01",
"updated_at": "2024-01-01",
}
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
@@ -273,11 +265,11 @@ class TestGitHubAPI:
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
metadata = fetcher.fetch_github_metadata()
assert metadata['stars'] == 1234
assert metadata['forks'] == 56
assert metadata['language'] == 'Python'
assert metadata["stars"] == 1234
assert metadata["forks"] == 56
assert metadata["language"] == "Python"
@patch('requests.get')
@patch("requests.get")
def test_fetch_github_metadata_failure(self, mock_get):
"""Test graceful handling of metadata fetch failure."""
mock_get.side_effect = Exception("API error")
@@ -286,21 +278,15 @@ class TestGitHubAPI:
metadata = fetcher.fetch_github_metadata()
# Should return default values instead of crashing
assert metadata['stars'] == 0
assert metadata['language'] == 'Unknown'
assert metadata["stars"] == 0
assert metadata["language"] == "Unknown"
@patch('requests.get')
@patch("requests.get")
def test_fetch_issues(self, mock_get):
"""Test fetching issues via GitHub API."""
mock_response = Mock()
mock_response.json.return_value = [
{
'title': 'Bug',
'number': 42,
'state': 'open',
'comments': 10,
'labels': [{'name': 'bug'}]
}
{"title": "Bug", "number": 42, "state": "open", "comments": 10, "labels": [{"name": "bug"}]}
]
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
@@ -312,13 +298,13 @@ class TestGitHubAPI:
# Should be called twice (open + closed)
assert mock_get.call_count == 2
@patch('requests.get')
@patch("requests.get")
def test_fetch_issues_filters_pull_requests(self, mock_get):
"""Test that pull requests are filtered out of issues."""
mock_response = Mock()
mock_response.json.return_value = [
{'title': 'Issue', 'number': 42, 'state': 'open', 'comments': 5, 'labels': []},
{'title': 'PR', 'number': 43, 'state': 'open', 'comments': 3, 'labels': [], 'pull_request': {}}
{"title": "Issue", "number": 42, "state": "open", "comments": 5, "labels": []},
{"title": "PR", "number": 43, "state": "open", "comments": 3, "labels": [], "pull_request": {}},
]
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
@@ -327,7 +313,7 @@ class TestGitHubAPI:
issues = fetcher.fetch_issues(max_issues=100)
# Should only include the issue, not the PR
assert all('pull_request' not in issue for issue in issues)
assert all("pull_request" not in issue for issue in issues)
class TestReadFile:
@@ -354,7 +340,7 @@ class TestReadFile:
"""Test fallback to latin-1 encoding if UTF-8 fails."""
test_file = tmp_path / "test.txt"
# Write bytes that are invalid UTF-8 but valid latin-1
test_file.write_bytes(b'\xff\xfe')
test_file.write_bytes(b"\xff\xfe")
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
content = fetcher.read_file(test_file)
@@ -366,8 +352,8 @@ class TestReadFile:
class TestIntegration:
"""Integration tests for complete three-stream fetching."""
@patch('subprocess.run')
@patch('requests.get')
@patch("subprocess.run")
@patch("requests.get")
def test_fetch_integration(self, mock_get, mock_run, tmp_path):
"""Test complete fetch() integration."""
# Mock git clone
@@ -379,24 +365,18 @@ class TestIntegration:
mock_response = Mock()
mock_response.raise_for_status = Mock()
if 'repos/' in url and '/issues' not in url:
if "repos/" in url and "/issues" not in url:
# Metadata call
mock_response.json.return_value = {
'stargazers_count': 1234,
'forks_count': 56,
'open_issues_count': 12,
'language': 'Python'
"stargazers_count": 1234,
"forks_count": 56,
"open_issues_count": 12,
"language": "Python",
}
else:
# Issues call
mock_response.json.return_value = [
{
'title': 'Test Issue',
'number': 42,
'state': 'open',
'comments': 10,
'labels': [{'name': 'bug'}]
}
{"title": "Test Issue", "number": 42, "state": "open", "comments": 10, "labels": [{"name": "bug"}]}
]
return mock_response
@@ -412,7 +392,7 @@ class TestIntegration:
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
# Mock clone to use our tmp_path
with patch.object(fetcher, 'clone_repo', return_value=repo_dir):
with patch.object(fetcher, "clone_repo", return_value=repo_dir):
three_streams = fetcher.fetch()
# Verify all 3 streams present
@@ -428,5 +408,5 @@ class TestIntegration:
assert "# README" in three_streams.docs_stream.readme
# Verify insights stream
assert three_streams.insights_stream.metadata['stars'] == 1234
assert three_streams.insights_stream.metadata["stars"] == 1234
assert len(three_streams.insights_stream.common_problems) > 0