Files
skill-seekers-reference/pyproject.toml
yusyus 4f9a5a553b feat: Phase 2 - Real upload capabilities for ChromaDB and Weaviate
Implemented complete upload functionality for vector databases, replacing
stub implementations with real upload capabilities including embedding
generation, multiple connection modes, and comprehensive error handling.

## ChromaDB Upload (chroma.py)
-  Multiple connection modes (PersistentClient, HttpClient)
-  3 embedding strategies (OpenAI, sentence-transformers, default)
-  Batch processing (100 docs per batch)
-  Progress tracking for large uploads
-  Collection management (create if not exists)

## Weaviate Upload (weaviate.py)
-  Local and cloud connections
-  Schema management (auto-create)
-  Batch upload with progress tracking
-  OpenAI embedding support

## Upload Command (upload_skill.py)
-  Added 8 new CLI arguments for vector DBs
-  Platform-specific kwargs handling
-  Enhanced output formatting (collection/class names)
-  Backward compatibility (LLM platforms unchanged)

## Dependencies (pyproject.toml)
-  Added 4 optional dependency groups:
  - chroma = ["chromadb>=0.4.0"]
  - weaviate = ["weaviate-client>=3.25.0"]
  - sentence-transformers = ["sentence-transformers>=2.2.0"]
  - rag-upload = [all vector DB deps]

## Testing (test_upload_integration.py)
-  15 new tests across 4 test classes
-  Works without optional dependencies installed
-  Error handling tests (missing files, invalid JSON)
-  Fixed 2 existing tests (chroma/weaviate adaptors)
-  37/37 tests passing

## User-Facing Examples

Local ChromaDB:
  skill-seekers upload output/react-chroma.json --target chroma \
    --persist-directory ./chroma_db

Weaviate Cloud:
  skill-seekers upload output/react-weaviate.json --target weaviate \
    --use-cloud --cluster-url https://xxx.weaviate.network

With OpenAI embeddings:
  skill-seekers upload output/react-chroma.json --target chroma \
    --embedding-function openai --openai-api-key $OPENAI_API_KEY

## Files Changed
- src/skill_seekers/cli/adaptors/chroma.py (250 lines)
- src/skill_seekers/cli/adaptors/weaviate.py (200 lines)
- src/skill_seekers/cli/upload_skill.py (50 lines)
- pyproject.toml (15 lines)
- tests/test_upload_integration.py (NEW - 293 lines)
- tests/test_adaptors/test_chroma_adaptor.py (1 line)
- tests/test_adaptors/test_weaviate_adaptor.py (1 line)

Total: 7 files, ~810 lines added/modified

See PHASE2_COMPLETION_SUMMARY.md for detailed documentation.

Time: ~7 hours (estimated 6-8h)
Status:  COMPLETE - Ready for Phase 3

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-08 01:30:04 +03:00

304 lines
8.4 KiB
TOML

[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "skill-seekers"
version = "2.9.0"
description = "Convert documentation websites, GitHub repositories, and PDFs into Claude AI skills. International support with Chinese (简体中文) documentation."
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT"}
authors = [
{name = "Yusuf Karaaslan"}
]
keywords = [
"claude",
"ai",
"documentation",
"scraping",
"skills",
"llm",
"mcp",
"automation",
"i18n",
"chinese",
"international"
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Documentation",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Text Processing :: Markup :: Markdown",
"Natural Language :: English",
"Natural Language :: Chinese (Simplified)",
]
# Core dependencies
dependencies = [
"requests>=2.32.5",
"beautifulsoup4>=4.14.2",
"PyGithub>=2.5.0",
"GitPython>=3.1.40",
"httpx>=0.28.1", # Required for async scraping (core feature)
"anthropic>=0.76.0", # Required for AI enhancement (core feature)
"PyMuPDF>=1.24.14",
"Pillow>=11.0.0",
"pytesseract>=0.3.13",
"pydantic>=2.12.3",
"pydantic-settings>=2.11.0",
"python-dotenv>=1.1.1",
"jsonschema>=4.25.1",
"click>=8.3.0",
"Pygments>=2.19.2",
"pathspec>=0.12.1",
"networkx>=3.0",
"tomli>=2.0.0; python_version < '3.11'", # TOML parser for version reading
"schedule>=1.2.0", # Required for sync monitoring
]
[project.optional-dependencies]
# MCP server dependencies (NOW TRULY OPTIONAL)
mcp = [
"mcp>=1.25,<2",
"httpx>=0.28.1",
"httpx-sse>=0.4.3",
"uvicorn>=0.38.0",
"starlette>=0.48.0",
"sse-starlette>=3.0.2",
]
# LLM platform-specific dependencies
# Google Gemini support
gemini = [
"google-generativeai>=0.8.0",
]
# OpenAI ChatGPT support
openai = [
"openai>=1.0.0",
]
# All LLM platforms combined
all-llms = [
"google-generativeai>=0.8.0",
"openai>=1.0.0",
]
# Cloud storage support
s3 = [
"boto3>=1.34.0",
]
gcs = [
"google-cloud-storage>=2.10.0",
]
azure = [
"azure-storage-blob>=12.19.0",
]
# RAG vector database upload support
chroma = [
"chromadb>=0.4.0",
]
weaviate = [
"weaviate-client>=3.25.0",
]
sentence-transformers = [
"sentence-transformers>=2.2.0",
]
rag-upload = [
"chromadb>=0.4.0",
"weaviate-client>=3.25.0",
"sentence-transformers>=2.2.0",
]
# All cloud storage providers combined
all-cloud = [
"boto3>=1.34.0",
"google-cloud-storage>=2.10.0",
"azure-storage-blob>=12.19.0",
]
# Embedding server support
embedding = [
"fastapi>=0.109.0",
"uvicorn>=0.27.0",
"sentence-transformers>=2.3.0",
"numpy>=1.24.0",
"voyageai>=0.2.0",
]
# All optional dependencies combined (dev dependencies now in [dependency-groups])
all = [
"mcp>=1.25,<2",
"httpx>=0.28.1",
"httpx-sse>=0.4.3",
"uvicorn>=0.38.0",
"starlette>=0.48.0",
"sse-starlette>=3.0.2",
"google-generativeai>=0.8.0",
"openai>=1.0.0",
"boto3>=1.34.0",
"google-cloud-storage>=2.10.0",
"azure-storage-blob>=12.19.0",
"chromadb>=0.4.0",
"weaviate-client>=3.25.0",
"fastapi>=0.109.0",
"sentence-transformers>=2.3.0",
"numpy>=1.24.0",
"voyageai>=0.2.0",
]
[project.urls]
Homepage = "https://skillseekersweb.com/"
Website = "https://skillseekersweb.com/"
Repository = "https://github.com/yusufkaraaslan/Skill_Seekers"
"Bug Tracker" = "https://github.com/yusufkaraaslan/Skill_Seekers/issues"
Documentation = "https://skillseekersweb.com/"
"Config Browser" = "https://skillseekersweb.com/"
"中文文档 (Chinese)" = "https://github.com/yusufkaraaslan/Skill_Seekers/blob/main/README.zh-CN.md"
"Author" = "https://x.com/_yUSyUS_"
[project.scripts]
# Main unified CLI
skill-seekers = "skill_seekers.cli.main:main"
# Individual tool entry points
skill-seekers-config = "skill_seekers.cli.config_command:main"
skill-seekers-resume = "skill_seekers.cli.resume_command:main"
skill-seekers-scrape = "skill_seekers.cli.doc_scraper:main"
skill-seekers-github = "skill_seekers.cli.github_scraper:main"
skill-seekers-pdf = "skill_seekers.cli.pdf_scraper:main"
skill-seekers-unified = "skill_seekers.cli.unified_scraper:main"
skill-seekers-enhance = "skill_seekers.cli.enhance_skill_local:main"
skill-seekers-enhance-status = "skill_seekers.cli.enhance_status:main"
skill-seekers-package = "skill_seekers.cli.package_skill:main"
skill-seekers-upload = "skill_seekers.cli.upload_skill:main"
skill-seekers-estimate = "skill_seekers.cli.estimate_pages:main"
skill-seekers-install = "skill_seekers.cli.install_skill:main"
skill-seekers-install-agent = "skill_seekers.cli.install_agent:main"
skill-seekers-codebase = "skill_seekers.cli.codebase_scraper:main"
skill-seekers-patterns = "skill_seekers.cli.pattern_recognizer:main"
skill-seekers-how-to-guides = "skill_seekers.cli.how_to_guide_builder:main"
skill-seekers-setup = "skill_seekers.cli.setup_wizard:main"
skill-seekers-cloud = "skill_seekers.cli.cloud_storage_cli:main"
skill-seekers-embed = "skill_seekers.embedding.server:main"
skill-seekers-sync = "skill_seekers.cli.sync_cli:main"
skill-seekers-benchmark = "skill_seekers.cli.benchmark_cli:main"
skill-seekers-stream = "skill_seekers.cli.streaming_ingest:main"
skill-seekers-update = "skill_seekers.cli.incremental_updater:main"
skill-seekers-multilang = "skill_seekers.cli.multilang_support:main"
skill-seekers-quality = "skill_seekers.cli.quality_metrics:main"
[tool.setuptools]
package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]
include = ["skill_seekers*"]
namespaces = false
[tool.setuptools.package-data]
skill_seekers = ["py.typed"]
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v --tb=short --strict-markers"
markers = [
"asyncio: mark test as an async test",
"slow: mark test as slow running (>5 seconds)",
"integration: mark test as integration test (requires external services)",
"e2e: mark test as end-to-end (resource-intensive, may create files)",
"venv: mark test as requiring virtual environment setup",
"bootstrap: mark test as bootstrap feature specific",
"benchmark: mark test as performance benchmark",
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
[tool.coverage.run]
source = ["src/skill_seekers"]
omit = ["*/tests/*", "*/__pycache__/*", "*/venv/*"]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"@abstractmethod",
]
[tool.ruff]
line-length = 100
target-version = "py310"
src = ["src", "tests"]
[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # Pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
"ARG", # flake8-unused-arguments
"SIM", # flake8-simplify
]
ignore = [
"E501", # line too long (handled by formatter)
"F541", # f-string without placeholders (style preference)
"ARG002", # unused method argument (often needed for interface compliance)
"B007", # loop control variable not used (sometimes intentional)
"I001", # import block unsorted (handled by formatter)
"SIM114", # combine if branches (style preference, can reduce readability)
]
[tool.ruff.lint.isort]
known-first-party = ["skill_seekers"]
[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = false
disallow_incomplete_defs = false
check_untyped_defs = true
ignore_missing_imports = true
show_error_codes = true
pretty = true
[[tool.mypy.overrides]]
module = "tests.*"
disallow_untyped_defs = false
check_untyped_defs = false
[dependency-groups]
dev = [
"pytest>=8.4.2",
"pytest-asyncio>=0.24.0",
"pytest-cov>=7.0.0",
"coverage>=7.11.0",
"ruff>=0.14.13",
"mypy>=1.19.1",
]