- Fixed formatting to comply with ruff standards - No functional changes, only formatting/style - Completes CI/CD pipeline formatting requirements
334 lines
12 KiB
Python
334 lines
12 KiB
Python
"""Tests for source type detection.
|
|
|
|
Tests the SourceDetector class's ability to identify and parse:
|
|
- Web URLs
|
|
- GitHub repositories
|
|
- Local directories
|
|
- PDF files
|
|
- Config files
|
|
"""
|
|
|
|
import os
|
|
import pytest
|
|
|
|
from skill_seekers.cli.source_detector import SourceDetector, SourceInfo
|
|
|
|
|
|
class TestWebDetection:
|
|
"""Test web URL detection."""
|
|
|
|
def test_detect_full_https_url(self):
|
|
"""Full HTTPS URL should be detected as web."""
|
|
info = SourceDetector.detect("https://docs.react.dev/")
|
|
assert info.type == "web"
|
|
assert info.parsed["url"] == "https://docs.react.dev/"
|
|
assert info.suggested_name == "react"
|
|
|
|
def test_detect_full_http_url(self):
|
|
"""Full HTTP URL should be detected as web."""
|
|
info = SourceDetector.detect("http://example.com/docs")
|
|
assert info.type == "web"
|
|
assert info.parsed["url"] == "http://example.com/docs"
|
|
|
|
def test_detect_domain_only(self):
|
|
"""Domain without protocol should add https:// and detect as web."""
|
|
info = SourceDetector.detect("docs.react.dev")
|
|
assert info.type == "web"
|
|
assert info.parsed["url"] == "https://docs.react.dev"
|
|
assert info.suggested_name == "react"
|
|
|
|
def test_detect_complex_url(self):
|
|
"""Complex URL with path should be detected as web."""
|
|
info = SourceDetector.detect("https://docs.python.org/3/library/")
|
|
assert info.type == "web"
|
|
assert info.parsed["url"] == "https://docs.python.org/3/library/"
|
|
assert info.suggested_name == "python"
|
|
|
|
def test_suggested_name_removes_www(self):
|
|
"""Should remove www. prefix from suggested name."""
|
|
info = SourceDetector.detect("https://www.example.com/")
|
|
assert info.type == "web"
|
|
assert info.suggested_name == "example"
|
|
|
|
def test_suggested_name_removes_docs(self):
|
|
"""Should remove docs. prefix from suggested name."""
|
|
info = SourceDetector.detect("https://docs.vue.org/")
|
|
assert info.type == "web"
|
|
assert info.suggested_name == "vue"
|
|
|
|
|
|
class TestGitHubDetection:
|
|
"""Test GitHub repository detection."""
|
|
|
|
def test_detect_owner_repo_format(self):
|
|
"""owner/repo format should be detected as GitHub."""
|
|
info = SourceDetector.detect("facebook/react")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "facebook/react"
|
|
assert info.suggested_name == "react"
|
|
|
|
def test_detect_github_https_url(self):
|
|
"""Full GitHub HTTPS URL should be detected."""
|
|
info = SourceDetector.detect("https://github.com/facebook/react")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "facebook/react"
|
|
assert info.suggested_name == "react"
|
|
|
|
def test_detect_github_url_with_git_suffix(self):
|
|
"""GitHub URL with .git should strip suffix."""
|
|
info = SourceDetector.detect("https://github.com/facebook/react.git")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "facebook/react"
|
|
assert info.suggested_name == "react"
|
|
|
|
def test_detect_github_url_without_protocol(self):
|
|
"""GitHub URL without protocol should be detected."""
|
|
info = SourceDetector.detect("github.com/vuejs/vue")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "vuejs/vue"
|
|
assert info.suggested_name == "vue"
|
|
|
|
def test_owner_repo_with_dots_and_dashes(self):
|
|
"""Repo names with dots and dashes should work."""
|
|
info = SourceDetector.detect("microsoft/vscode-python")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "microsoft/vscode-python"
|
|
assert info.suggested_name == "vscode-python"
|
|
|
|
|
|
class TestLocalDetection:
|
|
"""Test local directory detection."""
|
|
|
|
def test_detect_relative_directory(self, tmp_path):
|
|
"""Relative directory path should be detected."""
|
|
# Create a test directory
|
|
test_dir = tmp_path / "my_project"
|
|
test_dir.mkdir()
|
|
|
|
# Change to parent directory
|
|
original_cwd = os.getcwd()
|
|
try:
|
|
os.chdir(tmp_path)
|
|
info = SourceDetector.detect("./my_project")
|
|
assert info.type == "local"
|
|
assert "my_project" in info.parsed["directory"]
|
|
assert info.suggested_name == "my_project"
|
|
finally:
|
|
os.chdir(original_cwd)
|
|
|
|
def test_detect_absolute_directory(self, tmp_path):
|
|
"""Absolute directory path should be detected."""
|
|
# Create a test directory
|
|
test_dir = tmp_path / "test_repo"
|
|
test_dir.mkdir()
|
|
|
|
info = SourceDetector.detect(str(test_dir))
|
|
assert info.type == "local"
|
|
assert info.parsed["directory"] == str(test_dir.resolve())
|
|
assert info.suggested_name == "test_repo"
|
|
|
|
def test_detect_current_directory(self):
|
|
"""Current directory (.) should be detected."""
|
|
cwd = os.getcwd()
|
|
info = SourceDetector.detect(".")
|
|
assert info.type == "local"
|
|
assert info.parsed["directory"] == cwd
|
|
|
|
|
|
class TestPDFDetection:
|
|
"""Test PDF file detection."""
|
|
|
|
def test_detect_pdf_extension(self):
|
|
"""File with .pdf extension should be detected."""
|
|
info = SourceDetector.detect("tutorial.pdf")
|
|
assert info.type == "pdf"
|
|
assert info.parsed["file_path"] == "tutorial.pdf"
|
|
assert info.suggested_name == "tutorial"
|
|
|
|
def test_detect_pdf_with_path(self):
|
|
"""PDF file with path should be detected."""
|
|
info = SourceDetector.detect("/path/to/guide.pdf")
|
|
assert info.type == "pdf"
|
|
assert info.parsed["file_path"] == "/path/to/guide.pdf"
|
|
assert info.suggested_name == "guide"
|
|
|
|
def test_suggested_name_removes_pdf_extension(self):
|
|
"""Suggested name should not include .pdf extension."""
|
|
info = SourceDetector.detect("my-awesome-guide.pdf")
|
|
assert info.type == "pdf"
|
|
assert info.suggested_name == "my-awesome-guide"
|
|
|
|
|
|
class TestConfigDetection:
|
|
"""Test config file detection."""
|
|
|
|
def test_detect_json_extension(self):
|
|
"""File with .json extension should be detected as config."""
|
|
info = SourceDetector.detect("react.json")
|
|
assert info.type == "config"
|
|
assert info.parsed["config_path"] == "react.json"
|
|
assert info.suggested_name == "react"
|
|
|
|
def test_detect_config_with_path(self):
|
|
"""Config file with path should be detected."""
|
|
info = SourceDetector.detect("configs/django.json")
|
|
assert info.type == "config"
|
|
assert info.parsed["config_path"] == "configs/django.json"
|
|
assert info.suggested_name == "django"
|
|
|
|
|
|
class TestValidation:
|
|
"""Test source validation."""
|
|
|
|
def test_validate_existing_directory(self, tmp_path):
|
|
"""Validation should pass for existing directory."""
|
|
test_dir = tmp_path / "exists"
|
|
test_dir.mkdir()
|
|
|
|
info = SourceDetector.detect(str(test_dir))
|
|
# Should not raise
|
|
SourceDetector.validate_source(info)
|
|
|
|
def test_validate_nonexistent_directory(self):
|
|
"""Validation should fail for nonexistent directory."""
|
|
# Use a path that definitely doesn't exist
|
|
nonexistent = "/tmp/definitely_does_not_exist_12345"
|
|
|
|
# First try to detect it (will succeed since it looks like a path)
|
|
with pytest.raises(ValueError, match="Directory does not exist"):
|
|
info = SourceInfo(
|
|
type="local",
|
|
parsed={"directory": nonexistent},
|
|
suggested_name="test",
|
|
raw_input=nonexistent,
|
|
)
|
|
SourceDetector.validate_source(info)
|
|
|
|
def test_validate_existing_pdf(self, tmp_path):
|
|
"""Validation should pass for existing PDF."""
|
|
pdf_file = tmp_path / "test.pdf"
|
|
pdf_file.touch()
|
|
|
|
info = SourceDetector.detect(str(pdf_file))
|
|
# Should not raise
|
|
SourceDetector.validate_source(info)
|
|
|
|
def test_validate_nonexistent_pdf(self):
|
|
"""Validation should fail for nonexistent PDF."""
|
|
with pytest.raises(ValueError, match="PDF file does not exist"):
|
|
info = SourceInfo(
|
|
type="pdf",
|
|
parsed={"file_path": "/tmp/nonexistent.pdf"},
|
|
suggested_name="test",
|
|
raw_input="/tmp/nonexistent.pdf",
|
|
)
|
|
SourceDetector.validate_source(info)
|
|
|
|
def test_validate_existing_config(self, tmp_path):
|
|
"""Validation should pass for existing config."""
|
|
config_file = tmp_path / "test.json"
|
|
config_file.touch()
|
|
|
|
info = SourceDetector.detect(str(config_file))
|
|
# Should not raise
|
|
SourceDetector.validate_source(info)
|
|
|
|
def test_validate_nonexistent_config(self):
|
|
"""Validation should fail for nonexistent config."""
|
|
with pytest.raises(ValueError, match="Config file does not exist"):
|
|
info = SourceInfo(
|
|
type="config",
|
|
parsed={"config_path": "/tmp/nonexistent.json"},
|
|
suggested_name="test",
|
|
raw_input="/tmp/nonexistent.json",
|
|
)
|
|
SourceDetector.validate_source(info)
|
|
|
|
|
|
class TestAmbiguousCases:
|
|
"""Test handling of ambiguous inputs."""
|
|
|
|
def test_invalid_input_raises_error(self):
|
|
"""Invalid input should raise clear error with examples."""
|
|
with pytest.raises(ValueError) as exc_info:
|
|
SourceDetector.detect("invalid_input_without_dots_or_slashes")
|
|
|
|
error_msg = str(exc_info.value)
|
|
assert "Cannot determine source type" in error_msg
|
|
assert "Examples:" in error_msg
|
|
assert "skill-seekers create" in error_msg
|
|
|
|
def test_github_takes_precedence_over_web(self):
|
|
"""GitHub URL should be detected as github, not web."""
|
|
# Even though this is a URL, it should be detected as GitHub
|
|
info = SourceDetector.detect("https://github.com/owner/repo")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "owner/repo"
|
|
|
|
def test_directory_takes_precedence_over_domain(self, tmp_path):
|
|
"""Existing directory should be detected even if it looks like domain."""
|
|
# Create a directory that looks like a domain
|
|
dir_like_domain = tmp_path / "example.com"
|
|
dir_like_domain.mkdir()
|
|
|
|
info = SourceDetector.detect(str(dir_like_domain))
|
|
# Should detect as local directory, not web
|
|
assert info.type == "local"
|
|
|
|
|
|
class TestRawInputPreservation:
|
|
"""Test that raw_input is preserved correctly."""
|
|
|
|
def test_raw_input_preserved_for_web(self):
|
|
"""Original input should be stored in raw_input."""
|
|
original = "https://docs.python.org/"
|
|
info = SourceDetector.detect(original)
|
|
assert info.raw_input == original
|
|
|
|
def test_raw_input_preserved_for_github(self):
|
|
"""Original input should be stored even after parsing."""
|
|
original = "facebook/react"
|
|
info = SourceDetector.detect(original)
|
|
assert info.raw_input == original
|
|
|
|
def test_raw_input_preserved_for_local(self, tmp_path):
|
|
"""Original input should be stored before path normalization."""
|
|
test_dir = tmp_path / "test"
|
|
test_dir.mkdir()
|
|
|
|
original = str(test_dir)
|
|
info = SourceDetector.detect(original)
|
|
assert info.raw_input == original
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Test edge cases and corner cases."""
|
|
|
|
def test_trailing_slash_in_url(self):
|
|
"""URLs with and without trailing slash should work."""
|
|
info1 = SourceDetector.detect("https://docs.react.dev/")
|
|
info2 = SourceDetector.detect("https://docs.react.dev")
|
|
|
|
assert info1.type == "web"
|
|
assert info2.type == "web"
|
|
|
|
def test_uppercase_in_github_repo(self):
|
|
"""GitHub repos with uppercase should be detected."""
|
|
info = SourceDetector.detect("Microsoft/TypeScript")
|
|
assert info.type == "github"
|
|
assert info.parsed["repo"] == "Microsoft/TypeScript"
|
|
|
|
def test_numbers_in_repo_name(self):
|
|
"""GitHub repos with numbers should be detected."""
|
|
info = SourceDetector.detect("python/cpython3.11")
|
|
assert info.type == "github"
|
|
|
|
def test_nested_directory_path(self, tmp_path):
|
|
"""Nested directory paths should work."""
|
|
nested = tmp_path / "a" / "b" / "c"
|
|
nested.mkdir(parents=True)
|
|
|
|
info = SourceDetector.detect(str(nested))
|
|
assert info.type == "local"
|
|
assert info.suggested_name == "c"
|