Second batch of comprehensive linting fixes: Unused Arguments/Variables (136 errors): - ARG002/ARG001 (91 errors): Prefixed unused method/function arguments with '_' - Interface methods in adaptors (base.py, gemini.py, markdown.py) - AST analyzer methods maintaining signatures (code_analyzer.py) - Test fixtures and hooks (conftest.py) - Added noqa: ARG001/ARG002 for pytest hooks requiring exact names - F841 (45 errors): Prefixed unused local variables with '_' - Tuple unpacking where some values aren't needed - Variables assigned but not referenced Loop & Boolean Quality (28 errors): - B007 (18 errors): Prefixed unused loop control variables with '_' - enumerate() loops where index not used - for-in loops where loop variable not referenced - E712 (10 errors): Simplified boolean comparisons - Changed '== True' to direct boolean check - Changed '== False' to 'not' expression - Improved test readability Code Quality (24 errors): - SIM201 (4 errors): Already fixed in previous commit - SIM118 (2 errors): Already fixed in previous commit - E741 (4 errors): Already fixed in previous commit - Config manager loop variable fix (1 error) All Tests Passing: - test_scraper_features.py: 42 passed - test_integration.py: 51 passed - test_architecture_scenarios.py: 11 passed - test_real_world_fastmcp.py: 19 passed, 1 skipped Note: Some SIM errors (nested if, multiple with) remain unfixed as they would require non-trivial refactoring. Focus was on functional correctness. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
441 lines
16 KiB
Python
441 lines
16 KiB
Python
"""
|
|
Tests for GitHub Three-Stream Fetcher
|
|
|
|
Tests the three-stream architecture that splits GitHub repositories into:
|
|
- Code stream (for C3.x)
|
|
- Docs stream (README, docs/*.md)
|
|
- Insights stream (issues, metadata)
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
from skill_seekers.cli.github_fetcher import (
|
|
CodeStream,
|
|
DocsStream,
|
|
GitHubThreeStreamFetcher,
|
|
InsightsStream,
|
|
ThreeStreamData,
|
|
)
|
|
|
|
|
|
class TestDataClasses:
|
|
"""Test data class definitions."""
|
|
|
|
def test_code_stream(self):
|
|
"""Test CodeStream data class."""
|
|
code_stream = CodeStream(directory=Path("/tmp/repo"), files=[Path("/tmp/repo/src/main.py")])
|
|
assert code_stream.directory == Path("/tmp/repo")
|
|
assert len(code_stream.files) == 1
|
|
|
|
def test_docs_stream(self):
|
|
"""Test DocsStream data class."""
|
|
docs_stream = DocsStream(
|
|
readme="# README",
|
|
contributing="# Contributing",
|
|
docs_files=[{"path": "docs/guide.md", "content": "# Guide"}],
|
|
)
|
|
assert docs_stream.readme == "# README"
|
|
assert docs_stream.contributing == "# Contributing"
|
|
assert len(docs_stream.docs_files) == 1
|
|
|
|
def test_insights_stream(self):
|
|
"""Test InsightsStream data class."""
|
|
insights_stream = InsightsStream(
|
|
metadata={"stars": 1234, "forks": 56},
|
|
common_problems=[{"title": "Bug", "number": 42}],
|
|
known_solutions=[{"title": "Fix", "number": 35}],
|
|
top_labels=[{"label": "bug", "count": 10}],
|
|
)
|
|
assert insights_stream.metadata["stars"] == 1234
|
|
assert len(insights_stream.common_problems) == 1
|
|
assert len(insights_stream.known_solutions) == 1
|
|
assert len(insights_stream.top_labels) == 1
|
|
|
|
def test_three_stream_data(self):
|
|
"""Test ThreeStreamData combination."""
|
|
three_streams = ThreeStreamData(
|
|
code_stream=CodeStream(Path("/tmp"), []),
|
|
docs_stream=DocsStream(None, None, []),
|
|
insights_stream=InsightsStream({}, [], [], []),
|
|
)
|
|
assert isinstance(three_streams.code_stream, CodeStream)
|
|
assert isinstance(three_streams.docs_stream, DocsStream)
|
|
assert isinstance(three_streams.insights_stream, InsightsStream)
|
|
|
|
|
|
class TestGitHubFetcherInit:
|
|
"""Test GitHubThreeStreamFetcher initialization."""
|
|
|
|
def test_parse_https_url(self):
|
|
"""Test parsing HTTPS GitHub URLs."""
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/facebook/react")
|
|
assert fetcher.owner == "facebook"
|
|
assert fetcher.repo == "react"
|
|
|
|
def test_parse_https_url_with_git(self):
|
|
"""Test parsing HTTPS URLs with .git suffix."""
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/facebook/react.git")
|
|
assert fetcher.owner == "facebook"
|
|
assert fetcher.repo == "react"
|
|
|
|
def test_parse_git_url(self):
|
|
"""Test parsing git@ URLs."""
|
|
fetcher = GitHubThreeStreamFetcher("git@github.com:facebook/react.git")
|
|
assert fetcher.owner == "facebook"
|
|
assert fetcher.repo == "react"
|
|
|
|
def test_invalid_url(self):
|
|
"""Test invalid URL raises error."""
|
|
with pytest.raises(ValueError):
|
|
GitHubThreeStreamFetcher("https://invalid.com/repo")
|
|
|
|
@patch.dict("os.environ", {"GITHUB_TOKEN": "test_token"})
|
|
def test_github_token_from_env(self):
|
|
"""Test GitHub token loaded from environment."""
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/facebook/react")
|
|
assert fetcher.github_token == "test_token"
|
|
|
|
|
|
class TestFileClassification:
|
|
"""Test file classification into code vs docs."""
|
|
|
|
def test_classify_files(self, tmp_path):
|
|
"""Test classify_files separates code and docs correctly."""
|
|
# Create test directory structure
|
|
(tmp_path / "src").mkdir()
|
|
(tmp_path / "src" / "main.py").write_text("print('hello')")
|
|
(tmp_path / "src" / "utils.js").write_text("function(){}")
|
|
|
|
(tmp_path / "docs").mkdir()
|
|
(tmp_path / "README.md").write_text("# README")
|
|
(tmp_path / "docs" / "guide.md").write_text("# Guide")
|
|
(tmp_path / "docs" / "api.rst").write_text("API")
|
|
|
|
(tmp_path / "node_modules").mkdir()
|
|
(tmp_path / "node_modules" / "lib.js").write_text("// should be excluded")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
code_files, doc_files = fetcher.classify_files(tmp_path)
|
|
|
|
# Check code files
|
|
code_paths = [f.name for f in code_files]
|
|
assert "main.py" in code_paths
|
|
assert "utils.js" in code_paths
|
|
assert "lib.js" not in code_paths # Excluded
|
|
|
|
# Check doc files
|
|
doc_paths = [f.name for f in doc_files]
|
|
assert "README.md" in doc_paths
|
|
assert "guide.md" in doc_paths
|
|
assert "api.rst" in doc_paths
|
|
|
|
def test_classify_excludes_hidden_files(self, tmp_path):
|
|
"""Test that hidden files are excluded (except in docs/)."""
|
|
(tmp_path / ".hidden.py").write_text("hidden")
|
|
(tmp_path / "visible.py").write_text("visible")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
code_files, doc_files = fetcher.classify_files(tmp_path)
|
|
|
|
code_names = [f.name for f in code_files]
|
|
assert ".hidden.py" not in code_names
|
|
assert "visible.py" in code_names
|
|
|
|
def test_classify_various_code_extensions(self, tmp_path):
|
|
"""Test classification of various code file extensions."""
|
|
extensions = [".py", ".js", ".ts", ".go", ".rs", ".java", ".kt", ".rb", ".php"]
|
|
|
|
for ext in extensions:
|
|
(tmp_path / f"file{ext}").write_text("code")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
code_files, doc_files = fetcher.classify_files(tmp_path)
|
|
|
|
assert len(code_files) == len(extensions)
|
|
|
|
|
|
class TestIssueAnalysis:
|
|
"""Test GitHub issue analysis."""
|
|
|
|
def test_analyze_issues_common_problems(self):
|
|
"""Test extraction of common problems (open issues with 5+ comments)."""
|
|
issues = [
|
|
{
|
|
"title": "OAuth fails",
|
|
"number": 42,
|
|
"state": "open",
|
|
"comments": 10,
|
|
"labels": [{"name": "bug"}, {"name": "oauth"}],
|
|
},
|
|
{
|
|
"title": "Minor issue",
|
|
"number": 43,
|
|
"state": "open",
|
|
"comments": 2, # Too few comments
|
|
"labels": [],
|
|
},
|
|
]
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
insights = fetcher.analyze_issues(issues)
|
|
|
|
assert len(insights["common_problems"]) == 1
|
|
assert insights["common_problems"][0]["number"] == 42
|
|
assert insights["common_problems"][0]["comments"] == 10
|
|
|
|
def test_analyze_issues_known_solutions(self):
|
|
"""Test extraction of known solutions (closed issues with comments)."""
|
|
issues = [
|
|
{
|
|
"title": "Fixed OAuth",
|
|
"number": 35,
|
|
"state": "closed",
|
|
"comments": 5,
|
|
"labels": [{"name": "bug"}],
|
|
},
|
|
{
|
|
"title": "Closed without comments",
|
|
"number": 36,
|
|
"state": "closed",
|
|
"comments": 0, # No comments
|
|
"labels": [],
|
|
},
|
|
]
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
insights = fetcher.analyze_issues(issues)
|
|
|
|
assert len(insights["known_solutions"]) == 1
|
|
assert insights["known_solutions"][0]["number"] == 35
|
|
|
|
def test_analyze_issues_top_labels(self):
|
|
"""Test counting of top issue labels."""
|
|
issues = [
|
|
{"state": "open", "comments": 5, "labels": [{"name": "bug"}, {"name": "oauth"}]},
|
|
{"state": "open", "comments": 5, "labels": [{"name": "bug"}]},
|
|
{"state": "closed", "comments": 3, "labels": [{"name": "enhancement"}]},
|
|
]
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
insights = fetcher.analyze_issues(issues)
|
|
|
|
# Bug should be top label (appears twice)
|
|
assert insights["top_labels"][0]["label"] == "bug"
|
|
assert insights["top_labels"][0]["count"] == 2
|
|
|
|
def test_analyze_issues_limits_to_10(self):
|
|
"""Test that analysis limits results to top 10."""
|
|
issues = [
|
|
{
|
|
"title": f"Issue {i}",
|
|
"number": i,
|
|
"state": "open",
|
|
"comments": 20 - i, # Descending comment count
|
|
"labels": [],
|
|
}
|
|
for i in range(20)
|
|
]
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
insights = fetcher.analyze_issues(issues)
|
|
|
|
assert len(insights["common_problems"]) <= 10
|
|
# Should be sorted by comment count (descending)
|
|
if len(insights["common_problems"]) > 1:
|
|
assert (
|
|
insights["common_problems"][0]["comments"]
|
|
>= insights["common_problems"][1]["comments"]
|
|
)
|
|
|
|
|
|
class TestGitHubAPI:
|
|
"""Test GitHub API interactions."""
|
|
|
|
@patch("requests.get")
|
|
def test_fetch_github_metadata(self, mock_get):
|
|
"""Test fetching repository metadata via GitHub API."""
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = {
|
|
"stargazers_count": 1234,
|
|
"forks_count": 56,
|
|
"open_issues_count": 12,
|
|
"language": "Python",
|
|
"description": "Test repo",
|
|
"homepage": "https://example.com",
|
|
"created_at": "2020-01-01",
|
|
"updated_at": "2024-01-01",
|
|
}
|
|
mock_response.raise_for_status = Mock()
|
|
mock_get.return_value = mock_response
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
metadata = fetcher.fetch_github_metadata()
|
|
|
|
assert metadata["stars"] == 1234
|
|
assert metadata["forks"] == 56
|
|
assert metadata["language"] == "Python"
|
|
|
|
@patch("requests.get")
|
|
def test_fetch_github_metadata_failure(self, mock_get):
|
|
"""Test graceful handling of metadata fetch failure."""
|
|
mock_get.side_effect = Exception("API error")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
metadata = fetcher.fetch_github_metadata()
|
|
|
|
# Should return default values instead of crashing
|
|
assert metadata["stars"] == 0
|
|
assert metadata["language"] == "Unknown"
|
|
|
|
@patch("requests.get")
|
|
def test_fetch_issues(self, mock_get):
|
|
"""Test fetching issues via GitHub API."""
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = [
|
|
{
|
|
"title": "Bug",
|
|
"number": 42,
|
|
"state": "open",
|
|
"comments": 10,
|
|
"labels": [{"name": "bug"}],
|
|
}
|
|
]
|
|
mock_response.raise_for_status = Mock()
|
|
mock_get.return_value = mock_response
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
issues = fetcher.fetch_issues(max_issues=100)
|
|
|
|
assert len(issues) > 0
|
|
# Should be called twice (open + closed)
|
|
assert mock_get.call_count == 2
|
|
|
|
@patch("requests.get")
|
|
def test_fetch_issues_filters_pull_requests(self, mock_get):
|
|
"""Test that pull requests are filtered out of issues."""
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = [
|
|
{"title": "Issue", "number": 42, "state": "open", "comments": 5, "labels": []},
|
|
{
|
|
"title": "PR",
|
|
"number": 43,
|
|
"state": "open",
|
|
"comments": 3,
|
|
"labels": [],
|
|
"pull_request": {},
|
|
},
|
|
]
|
|
mock_response.raise_for_status = Mock()
|
|
mock_get.return_value = mock_response
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
issues = fetcher.fetch_issues(max_issues=100)
|
|
|
|
# Should only include the issue, not the PR
|
|
assert all("pull_request" not in issue for issue in issues)
|
|
|
|
|
|
class TestReadFile:
|
|
"""Test file reading utilities."""
|
|
|
|
def test_read_file_success(self, tmp_path):
|
|
"""Test successful file reading."""
|
|
test_file = tmp_path / "test.txt"
|
|
test_file.write_text("Hello, world!")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
content = fetcher.read_file(test_file)
|
|
|
|
assert content == "Hello, world!"
|
|
|
|
def test_read_file_not_found(self, tmp_path):
|
|
"""Test reading non-existent file returns None."""
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
content = fetcher.read_file(tmp_path / "missing.txt")
|
|
|
|
assert content is None
|
|
|
|
def test_read_file_encoding_fallback(self, tmp_path):
|
|
"""Test fallback to latin-1 encoding if UTF-8 fails."""
|
|
test_file = tmp_path / "test.txt"
|
|
# Write bytes that are invalid UTF-8 but valid latin-1
|
|
test_file.write_bytes(b"\xff\xfe")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo")
|
|
content = fetcher.read_file(test_file)
|
|
|
|
# Should still read successfully with latin-1
|
|
assert content is not None
|
|
|
|
|
|
class TestIntegration:
|
|
"""Integration tests for complete three-stream fetching."""
|
|
|
|
@patch("subprocess.run")
|
|
@patch("requests.get")
|
|
def test_fetch_integration(self, mock_get, mock_run, tmp_path):
|
|
"""Test complete fetch() integration."""
|
|
# Mock git clone
|
|
mock_run.return_value = Mock(returncode=0, stderr="")
|
|
|
|
# Mock GitHub API calls
|
|
def api_side_effect(*args, **_kwargs):
|
|
url = args[0]
|
|
mock_response = Mock()
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
if "repos/" in url and "/issues" not in url:
|
|
# Metadata call
|
|
mock_response.json.return_value = {
|
|
"stargazers_count": 1234,
|
|
"forks_count": 56,
|
|
"open_issues_count": 12,
|
|
"language": "Python",
|
|
}
|
|
else:
|
|
# Issues call
|
|
mock_response.json.return_value = [
|
|
{
|
|
"title": "Test Issue",
|
|
"number": 42,
|
|
"state": "open",
|
|
"comments": 10,
|
|
"labels": [{"name": "bug"}],
|
|
}
|
|
]
|
|
return mock_response
|
|
|
|
mock_get.side_effect = api_side_effect
|
|
|
|
# Create test repo structure
|
|
repo_dir = tmp_path / "repo"
|
|
repo_dir.mkdir()
|
|
(repo_dir / "src").mkdir()
|
|
(repo_dir / "src" / "main.py").write_text("print('hello')")
|
|
(repo_dir / "README.md").write_text("# README")
|
|
|
|
fetcher = GitHubThreeStreamFetcher("https://github.com/test/repo", interactive=False)
|
|
|
|
# Mock clone to use our tmp_path
|
|
with patch.object(fetcher, "clone_repo", return_value=repo_dir):
|
|
three_streams = fetcher.fetch()
|
|
|
|
# Verify all 3 streams present
|
|
assert three_streams.code_stream is not None
|
|
assert three_streams.docs_stream is not None
|
|
assert three_streams.insights_stream is not None
|
|
|
|
# Verify code stream
|
|
assert len(three_streams.code_stream.files) > 0
|
|
|
|
# Verify docs stream
|
|
assert three_streams.docs_stream.readme is not None
|
|
assert "# README" in three_streams.docs_stream.readme
|
|
|
|
# Verify insights stream
|
|
assert three_streams.insights_stream.metadata["stars"] == 1234
|
|
assert len(three_streams.insights_stream.common_problems) > 0
|