diff --git a/src/skill_seekers/benchmark/framework.py b/src/skill_seekers/benchmark/framework.py index 448b80d..ba1fb89 100644 --- a/src/skill_seekers/benchmark/framework.py +++ b/src/skill_seekers/benchmark/framework.py @@ -7,7 +7,8 @@ import psutil import functools from contextlib import contextmanager from datetime import datetime -from typing import List, Dict, Any, Optional, Callable +from typing import Any +from collections.abc import Callable from pathlib import Path from .models import ( @@ -38,13 +39,13 @@ class BenchmarkResult: """ self.name = name self.started_at = datetime.utcnow() - self.finished_at: Optional[datetime] = None + self.finished_at: datetime | None = None - self.timings: List[TimingResult] = [] - self.memory: List[MemoryUsage] = [] - self.metrics: List[Metric] = [] - self.system_info: Dict[str, Any] = {} - self.recommendations: List[str] = [] + self.timings: list[TimingResult] = [] + self.memory: list[MemoryUsage] = [] + self.metrics: list[Metric] = [] + self.system_info: dict[str, Any] = {} + self.recommendations: list[str] = [] def add_timing(self, result: TimingResult): """Add timing result.""" @@ -209,7 +210,7 @@ class Benchmark: self, func: Callable, *args, - operation: Optional[str] = None, + operation: str | None = None, track_memory: bool = False, **kwargs ) -> Any: @@ -237,14 +238,13 @@ class Benchmark: op_name = operation or func.__name__ if track_memory: - with self.memory(op_name): - with self.timer(op_name): - return func(*args, **kwargs) + with self.memory(op_name), self.timer(op_name): + return func(*args, **kwargs) else: with self.timer(op_name): return func(*args, **kwargs) - def timed(self, operation: Optional[str] = None, track_memory: bool = False): + def timed(self, operation: str | None = None, track_memory: bool = False): """ Decorator for timing functions. diff --git a/src/skill_seekers/benchmark/models.py b/src/skill_seekers/benchmark/models.py index 6e49940..107d100 100644 --- a/src/skill_seekers/benchmark/models.py +++ b/src/skill_seekers/benchmark/models.py @@ -2,7 +2,7 @@ Pydantic models for benchmarking. """ -from typing import List, Dict, Optional, Any +from typing import Any from datetime import datetime from pydantic import BaseModel, Field @@ -26,8 +26,8 @@ class TimingResult(BaseModel): duration: float = Field(..., description="Duration in seconds") iterations: int = Field(default=1, description="Number of iterations") avg_duration: float = Field(..., description="Average duration per iteration") - min_duration: Optional[float] = Field(None, description="Minimum duration") - max_duration: Optional[float] = Field(None, description="Maximum duration") + min_duration: float | None = Field(None, description="Minimum duration") + max_duration: float | None = Field(None, description="Maximum duration") class MemoryUsage(BaseModel): @@ -48,24 +48,24 @@ class BenchmarkReport(BaseModel): finished_at: datetime = Field(..., description="Finish time") total_duration: float = Field(..., description="Total duration in seconds") - timings: List[TimingResult] = Field( + timings: list[TimingResult] = Field( default_factory=list, description="Timing results" ) - memory: List[MemoryUsage] = Field( + memory: list[MemoryUsage] = Field( default_factory=list, description="Memory usage results" ) - metrics: List[Metric] = Field( + metrics: list[Metric] = Field( default_factory=list, description="Additional metrics" ) - system_info: Dict[str, Any] = Field( + system_info: dict[str, Any] = Field( default_factory=dict, description="System information" ) - recommendations: List[str] = Field( + recommendations: list[str] = Field( default_factory=list, description="Optimization recommendations" ) @@ -89,11 +89,11 @@ class ComparisonReport(BaseModel): baseline: BenchmarkReport = Field(..., description="Baseline benchmark") current: BenchmarkReport = Field(..., description="Current benchmark") - improvements: List[str] = Field( + improvements: list[str] = Field( default_factory=list, description="Performance improvements" ) - regressions: List[str] = Field( + regressions: list[str] = Field( default_factory=list, description="Performance regressions" ) diff --git a/src/skill_seekers/benchmark/runner.py b/src/skill_seekers/benchmark/runner.py index 3f238cb..1233453 100644 --- a/src/skill_seekers/benchmark/runner.py +++ b/src/skill_seekers/benchmark/runner.py @@ -4,7 +4,8 @@ Benchmark execution and orchestration. import json from pathlib import Path -from typing import List, Dict, Any, Optional, Callable +from typing import Any +from collections.abc import Callable from datetime import datetime from .framework import Benchmark @@ -34,7 +35,7 @@ class BenchmarkRunner: }) """ - def __init__(self, output_dir: Optional[Path] = None): + def __init__(self, output_dir: Path | None = None): """ Initialize runner. @@ -91,9 +92,9 @@ class BenchmarkRunner: def run_suite( self, - benchmarks: Dict[str, Callable[[Benchmark], None]], + benchmarks: dict[str, Callable[[Benchmark], None]], save: bool = True - ) -> Dict[str, BenchmarkReport]: + ) -> dict[str, BenchmarkReport]: """ Run multiple benchmarks. @@ -217,7 +218,7 @@ class BenchmarkRunner: memory_change_mb=memory_change_mb ) - def list_benchmarks(self) -> List[Dict[str, Any]]: + def list_benchmarks(self) -> list[dict[str, Any]]: """ List saved benchmarks. @@ -252,7 +253,7 @@ class BenchmarkRunner: return benchmarks - def get_latest(self, name: str) -> Optional[Path]: + def get_latest(self, name: str) -> Path | None: """ Get path to latest benchmark with given name. @@ -292,7 +293,7 @@ class BenchmarkRunner: runner.cleanup_old(keep_latest=3) """ # Group by benchmark name - by_name: Dict[str, List[Path]] = {} + by_name: dict[str, list[Path]] = {} for path in self.output_dir.glob("*.json"): # Extract name from filename (name_timestamp.json) diff --git a/src/skill_seekers/cli/adaptors/base.py b/src/skill_seekers/cli/adaptors/base.py index ba79806..4b8246e 100644 --- a/src/skill_seekers/cli/adaptors/base.py +++ b/src/skill_seekers/cli/adaptors/base.py @@ -9,7 +9,7 @@ This enables Skill Seekers to generate skills for multiple LLM platforms (Claude from abc import ABC, abstractmethod from dataclasses import dataclass, field from pathlib import Path -from typing import Any, List, Tuple +from typing import Any @dataclass @@ -283,7 +283,7 @@ class SkillAdaptor(ABC): chunk_max_tokens: int = 512, preserve_code_blocks: bool = True, source_file: str = None - ) -> List[Tuple[str, dict]]: + ) -> list[tuple[str, dict]]: """ Optionally chunk content for RAG platforms. diff --git a/src/skill_seekers/cli/adaptors/chroma.py b/src/skill_seekers/cli/adaptors/chroma.py index 2adafed..9f332d4 100644 --- a/src/skill_seekers/cli/adaptors/chroma.py +++ b/src/skill_seekers/cli/adaptors/chroma.py @@ -256,10 +256,9 @@ class ChromaAdaptor(SkillAdaptor): # Parse URL if '://' in chroma_url: parts = chroma_url.split('://') - protocol = parts[0] + parts[0] host_port = parts[1] else: - protocol = 'http' host_port = chroma_url if ':' in host_port: diff --git a/src/skill_seekers/cli/adaptors/faiss_helpers.py b/src/skill_seekers/cli/adaptors/faiss_helpers.py index b8b5fa1..2a4480a 100644 --- a/src/skill_seekers/cli/adaptors/faiss_helpers.py +++ b/src/skill_seekers/cli/adaptors/faiss_helpers.py @@ -236,7 +236,7 @@ class FAISSHelpers(SkillAdaptor): Returns: Result with usage instructions """ - example_code = """ + example_code = f""" # Example: Create FAISS index with JSON metadata (safe & portable) import faiss @@ -246,7 +246,7 @@ from openai import OpenAI from pathlib import Path # Load data -with open("{path}") as f: +with open("{package_path.name}") as f: data = json.load(f) # Generate embeddings (using OpenAI) @@ -387,9 +387,7 @@ print(f"\\nIndex stats:") print(f" Total vectors: {{index.ntotal}}") print(f" Dimension: {{dimension}}") print(f" Type: {{type(index).__name__}}") -""".format( - path=package_path.name - ) +""" return { "success": False, diff --git a/src/skill_seekers/cli/adaptors/haystack.py b/src/skill_seekers/cli/adaptors/haystack.py index 1a69f79..eb9541f 100644 --- a/src/skill_seekers/cli/adaptors/haystack.py +++ b/src/skill_seekers/cli/adaptors/haystack.py @@ -225,7 +225,7 @@ class HaystackAdaptor(SkillAdaptor): Returns: Result indicating no upload capability """ - example_code = """ + example_code = f""" # Example: Load into Haystack 2.x from haystack import Document @@ -234,7 +234,7 @@ from haystack.components.retrievers.in_memory import InMemoryBM25Retriever import json # Load documents -with open("{path}") as f: +with open("{package_path.name}") as f: docs_data = json.load(f) # Convert to Haystack Documents @@ -254,9 +254,7 @@ retriever = InMemoryBM25Retriever(document_store=document_store) results = retriever.run(query="your question here") for doc in results["documents"]: print(doc.content) -""".format( - path=package_path.name - ) +""" return { "success": False, diff --git a/src/skill_seekers/cli/adaptors/langchain.py b/src/skill_seekers/cli/adaptors/langchain.py index d41aebe..4481384 100644 --- a/src/skill_seekers/cli/adaptors/langchain.py +++ b/src/skill_seekers/cli/adaptors/langchain.py @@ -222,14 +222,14 @@ class LangChainAdaptor(SkillAdaptor): Returns: Result indicating no upload capability """ - example_code = """ + example_code = f""" # Example: Load into LangChain from langchain.schema import Document import json # Load documents -with open("{path}") as f: +with open("{package_path.name}") as f: docs_data = json.load(f) # Convert to LangChain Documents @@ -247,9 +247,7 @@ retriever = vectorstore.as_retriever() # Query results = retriever.get_relevant_documents("your query here") -""".format( - path=package_path.name - ) +""" return { "success": False, diff --git a/src/skill_seekers/cli/adaptors/llama_index.py b/src/skill_seekers/cli/adaptors/llama_index.py index 994985f..f4a0637 100644 --- a/src/skill_seekers/cli/adaptors/llama_index.py +++ b/src/skill_seekers/cli/adaptors/llama_index.py @@ -245,7 +245,7 @@ class LlamaIndexAdaptor(SkillAdaptor): Returns: Result indicating no upload capability """ - example_code = """ + example_code = f""" # Example: Load into LlamaIndex from llama_index.core.schema import TextNode @@ -253,7 +253,7 @@ from llama_index.core import VectorStoreIndex import json # Load nodes -with open("{path}") as f: +with open("{package_path.name}") as f: nodes_data = json.load(f) # Convert to LlamaIndex Nodes @@ -275,9 +275,7 @@ query_engine = index.as_query_engine() # Query response = query_engine.query("your question here") print(response) -""".format( - path=package_path.name - ) +""" return { "success": False, diff --git a/src/skill_seekers/cli/adaptors/qdrant.py b/src/skill_seekers/cli/adaptors/qdrant.py index ac7dab5..b74815e 100644 --- a/src/skill_seekers/cli/adaptors/qdrant.py +++ b/src/skill_seekers/cli/adaptors/qdrant.py @@ -261,7 +261,7 @@ class QdrantAdaptor(SkillAdaptor): Returns: Result with usage instructions """ - example_code = """ + example_code = f""" # Example: Create Qdrant collection and upload points from qdrant_client import QdrantClient @@ -271,7 +271,7 @@ from pathlib import Path from openai import OpenAI # Load data -with open("{path}") as f: +with open("{package_path.name}") as f: data = json.load(f) # Connect to Qdrant (local or cloud) @@ -438,7 +438,7 @@ similar = client.recommend( negative=["point-id-2"], # But not this limit=5 ) -""".format(path=package_path.name) +""" return { "success": False, diff --git a/src/skill_seekers/cli/adaptors/streaming_adaptor.py b/src/skill_seekers/cli/adaptors/streaming_adaptor.py index 8629b5e..164a3ed 100644 --- a/src/skill_seekers/cli/adaptors/streaming_adaptor.py +++ b/src/skill_seekers/cli/adaptors/streaming_adaptor.py @@ -8,7 +8,7 @@ Enables memory-efficient processing of large documentation sets. import json from pathlib import Path -from typing import Any, Iterator, Optional +from typing import Any import sys # Add parent directory to path for imports @@ -36,7 +36,7 @@ class StreamingAdaptorMixin: chunk_size: int = 4000, chunk_overlap: int = 200, batch_size: int = 100, - progress_callback: Optional[callable] = None + progress_callback: callable | None = None ) -> Path: """ Package skill using streaming ingestion. @@ -179,7 +179,7 @@ class StreamingAdaptorMixin: Estimation statistics """ skill_dir = Path(skill_dir) - ingester = StreamingIngester( + StreamingIngester( chunk_size=chunk_size, chunk_overlap=chunk_overlap ) diff --git a/src/skill_seekers/cli/benchmark_cli.py b/src/skill_seekers/cli/benchmark_cli.py index 31a1ad1..59927fd 100644 --- a/src/skill_seekers/cli/benchmark_cli.py +++ b/src/skill_seekers/cli/benchmark_cli.py @@ -42,17 +42,15 @@ def run_scraping_benchmark(runner, config): scrape_config_path = config.get("scrape_config") # Time scraping - with bench.timer("scrape_docs"): - with bench.memory("scrape_docs"): - pages = scrape_all(scrape_config_path) + with bench.timer("scrape_docs"), bench.memory("scrape_docs"): + pages = scrape_all(scrape_config_path) # Track metrics bench.metric("pages_scraped", len(pages), "pages") # Time building - with bench.timer("build_skill"): - with bench.memory("build_skill"): - build_skill(scrape_config_path, pages) + with bench.timer("build_skill"), bench.memory("build_skill"): + build_skill(scrape_config_path, pages) name = config.get("name", "scraping-benchmark") report = runner.run(name, benchmark_func) @@ -76,9 +74,8 @@ def run_embedding_benchmark(runner, config): # Batch embedding if len(texts) > 1: - with bench.timer("batch_embedding"): - with bench.memory("batch_embedding"): - embeddings = generator.generate_batch(texts, model=model) + with bench.timer("batch_embedding"), bench.memory("batch_embedding"): + embeddings = generator.generate_batch(texts, model=model) bench.metric("embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec") diff --git a/src/skill_seekers/cli/cloud_storage_cli.py b/src/skill_seekers/cli/cloud_storage_cli.py index d9fd212..8f25cb3 100644 --- a/src/skill_seekers/cli/cloud_storage_cli.py +++ b/src/skill_seekers/cli/cloud_storage_cli.py @@ -8,7 +8,6 @@ Upload, download, and manage skills in cloud storage (S3, GCS, Azure). import sys import argparse from pathlib import Path -from typing import Optional from .storage import get_storage_adaptor @@ -155,7 +154,7 @@ def format_size(size_bytes: int) -> str: return f"{size_bytes:.1f}PB" -def parse_extra_args(extra: Optional[list]) -> dict: +def parse_extra_args(extra: list | None) -> dict: """Parse extra arguments into dictionary.""" if not extra: return {} diff --git a/src/skill_seekers/cli/embedding_pipeline.py b/src/skill_seekers/cli/embedding_pipeline.py index 72521f4..cc75415 100644 --- a/src/skill_seekers/cli/embedding_pipeline.py +++ b/src/skill_seekers/cli/embedding_pipeline.py @@ -10,7 +10,7 @@ import hashlib import json import time from pathlib import Path -from typing import List, Optional, Dict, Any, Tuple +from typing import Any from dataclasses import dataclass, field from abc import ABC, abstractmethod import numpy as np @@ -23,7 +23,7 @@ class EmbeddingConfig: model: str dimension: int batch_size: int = 100 - cache_dir: Optional[Path] = None + cache_dir: Path | None = None max_retries: int = 3 retry_delay: float = 1.0 @@ -31,8 +31,8 @@ class EmbeddingConfig: @dataclass class EmbeddingResult: """Result of embedding generation.""" - embeddings: List[List[float]] - metadata: Dict[str, Any] = field(default_factory=dict) + embeddings: list[list[float]] + metadata: dict[str, Any] = field(default_factory=dict) cached_count: int = 0 generated_count: int = 0 total_time: float = 0.0 @@ -59,7 +59,7 @@ class CostTracker: else: self.cache_misses += 1 - def get_stats(self) -> Dict[str, Any]: + def get_stats(self) -> dict[str, Any]: """Get statistics.""" cache_rate = (self.cache_hits / self.total_requests * 100) if self.total_requests > 0 else 0 @@ -77,7 +77,7 @@ class EmbeddingProvider(ABC): """Abstract base class for embedding providers.""" @abstractmethod - def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + def generate_embeddings(self, texts: list[str]) -> list[list[float]]: """Generate embeddings for texts.""" pass @@ -108,7 +108,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): 'text-embedding-3-large': 3072, } - def __init__(self, model: str = 'text-embedding-ada-002', api_key: Optional[str] = None): + def __init__(self, model: str = 'text-embedding-ada-002', api_key: str | None = None): """Initialize OpenAI provider.""" self.model = model self.api_key = api_key @@ -124,7 +124,7 @@ class OpenAIEmbeddingProvider(EmbeddingProvider): raise ImportError("OpenAI package not installed. Install with: pip install openai") return self._client - def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + def generate_embeddings(self, texts: list[str]) -> list[list[float]]: """Generate embeddings using OpenAI.""" client = self._get_client() @@ -155,7 +155,7 @@ class LocalEmbeddingProvider(EmbeddingProvider): """Initialize local provider.""" self.dimension = dimension - def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + def generate_embeddings(self, texts: list[str]) -> list[list[float]]: """Generate embeddings using local model (simulated).""" # In production, would use sentence-transformers or similar embeddings = [] @@ -180,10 +180,10 @@ class LocalEmbeddingProvider(EmbeddingProvider): class EmbeddingCache: """Cache for embeddings to avoid recomputation.""" - def __init__(self, cache_dir: Optional[Path] = None): + def __init__(self, cache_dir: Path | None = None): """Initialize cache.""" self.cache_dir = Path(cache_dir) if cache_dir else None - self._memory_cache: Dict[str, List[float]] = {} + self._memory_cache: dict[str, list[float]] = {} if self.cache_dir: self.cache_dir.mkdir(parents=True, exist_ok=True) @@ -193,7 +193,7 @@ class EmbeddingCache: key = f"{model}:{text}" return hashlib.sha256(key.encode()).hexdigest() - def get(self, text: str, model: str) -> Optional[List[float]]: + def get(self, text: str, model: str) -> list[float] | None: """Get embedding from cache.""" cache_key = self._compute_hash(text, model) @@ -215,7 +215,7 @@ class EmbeddingCache: return None - def set(self, text: str, model: str, embedding: List[float]) -> None: + def set(self, text: str, model: str, embedding: list[float]) -> None: """Store embedding in cache.""" cache_key = self._compute_hash(text, model) @@ -266,7 +266,7 @@ class EmbeddingPipeline: def generate_batch( self, - texts: List[str], + texts: list[str], show_progress: bool = True ) -> EmbeddingResult: """ @@ -313,7 +313,7 @@ class EmbeddingPipeline: new_embeddings = self.provider.generate_embeddings(to_generate) # Store in cache - for text, embedding in zip(to_generate, new_embeddings): + for text, embedding in zip(to_generate, new_embeddings, strict=False): self.cache.set(text, self.config.model, embedding) # Track cost @@ -322,7 +322,7 @@ class EmbeddingPipeline: self.cost_tracker.add_request(total_tokens, cost, from_cache=False) # Merge with cached - for idx, embedding in zip(to_generate_indices, new_embeddings): + for idx, embedding in zip(to_generate_indices, new_embeddings, strict=False): batch_embeddings.insert(idx, embedding) generated_count += len(to_generate) @@ -359,7 +359,7 @@ class EmbeddingPipeline: cost_estimate=self.cost_tracker.estimated_cost ) - def validate_dimensions(self, embeddings: List[List[float]]) -> bool: + def validate_dimensions(self, embeddings: list[list[float]]) -> bool: """ Validate embedding dimensions. @@ -379,7 +379,7 @@ class EmbeddingPipeline: return True - def get_cost_stats(self) -> Dict[str, Any]: + def get_cost_stats(self) -> dict[str, Any]: """Get cost tracking statistics.""" return self.cost_tracker.get_stats() diff --git a/src/skill_seekers/cli/incremental_updater.py b/src/skill_seekers/cli/incremental_updater.py index bcf2a36..a6b3ff3 100644 --- a/src/skill_seekers/cli/incremental_updater.py +++ b/src/skill_seekers/cli/incremental_updater.py @@ -9,10 +9,8 @@ Tracks document versions and generates delta packages. import json import hashlib from pathlib import Path -from typing import Optional, Dict, List, Set from dataclasses import dataclass, asdict from datetime import datetime -import difflib @dataclass @@ -28,10 +26,10 @@ class DocumentVersion: @dataclass class ChangeSet: """Set of changes detected.""" - added: List[DocumentVersion] - modified: List[DocumentVersion] - deleted: List[str] - unchanged: List[DocumentVersion] + added: list[DocumentVersion] + modified: list[DocumentVersion] + deleted: list[str] + unchanged: list[DocumentVersion] @property def has_changes(self) -> bool: @@ -50,7 +48,7 @@ class UpdateMetadata: timestamp: str previous_version: str new_version: str - change_summary: Dict[str, int] + change_summary: dict[str, int] total_documents: int @@ -72,8 +70,8 @@ class IncrementalUpdater: """ self.skill_dir = Path(skill_dir) self.version_file = self.skill_dir / version_file - self.current_versions: Dict[str, DocumentVersion] = {} - self.previous_versions: Dict[str, DocumentVersion] = {} + self.current_versions: dict[str, DocumentVersion] = {} + self.previous_versions: dict[str, DocumentVersion] = {} def _compute_file_hash(self, file_path: Path) -> str: """ @@ -96,7 +94,7 @@ class IncrementalUpdater: print(f"āš ļø Warning: Failed to hash {file_path}: {e}") return "" - def _scan_documents(self) -> Dict[str, DocumentVersion]: + def _scan_documents(self) -> dict[str, DocumentVersion]: """ Scan skill directory and build version map. @@ -356,7 +354,7 @@ class IncrementalUpdater: # Read current content current_path = self.skill_dir / doc.file_path - current_content = current_path.read_text(encoding="utf-8").splitlines() + current_path.read_text(encoding="utf-8").splitlines() # Generate diff (simplified) lines.append(f" Size: {prev.size_bytes:,} → {doc.size_bytes:,} bytes") diff --git a/src/skill_seekers/cli/multilang_support.py b/src/skill_seekers/cli/multilang_support.py index f0805fc..8074ef4 100644 --- a/src/skill_seekers/cli/multilang_support.py +++ b/src/skill_seekers/cli/multilang_support.py @@ -8,9 +8,7 @@ and translation-ready format generation. import re from pathlib import Path -from typing import Dict, List, Optional, Set from dataclasses import dataclass -from collections import Counter import json @@ -20,16 +18,16 @@ class LanguageInfo: code: str # ISO 639-1 code (e.g., 'en', 'es', 'zh') name: str # Full name (e.g., 'English', 'Spanish', 'Chinese') confidence: float # Detection confidence (0.0-1.0) - script: Optional[str] = None # Script type (e.g., 'Latin', 'Cyrillic') + script: str | None = None # Script type (e.g., 'Latin', 'Cyrillic') @dataclass class TranslationStatus: """Translation status for a document.""" source_language: str - target_languages: List[str] - translated_languages: Set[str] - missing_languages: Set[str] + target_languages: list[str] + translated_languages: set[str] + missing_languages: set[str] completeness: float # Percentage (0.0-1.0) @@ -155,7 +153,7 @@ class LanguageDetector: script=self.SCRIPTS.get(best_lang) ) - def detect_from_filename(self, filename: str) -> Optional[str]: + def detect_from_filename(self, filename: str) -> str | None: """ Detect language from filename pattern. @@ -194,15 +192,15 @@ class MultiLanguageManager: def __init__(self): """Initialize multi-language manager.""" self.detector = LanguageDetector() - self.documents: Dict[str, List[Dict]] = {} # lang_code -> [docs] - self.primary_language: Optional[str] = None + self.documents: dict[str, list[dict]] = {} # lang_code -> [docs] + self.primary_language: str | None = None def add_document( self, file_path: str, content: str, - metadata: Optional[Dict] = None, - force_language: Optional[str] = None + metadata: dict | None = None, + force_language: str | None = None ) -> None: """ Add document with language detection. @@ -258,11 +256,11 @@ class MultiLanguageManager: self.documents[lang_code].append(doc) - def get_languages(self) -> List[str]: + def get_languages(self) -> list[str]: """Get list of detected languages.""" return sorted(self.documents.keys()) - def get_document_count(self, language: Optional[str] = None) -> int: + def get_document_count(self, language: str | None = None) -> int: """ Get document count for a language. @@ -276,7 +274,7 @@ class MultiLanguageManager: return len(self.documents.get(language, [])) return sum(len(docs) for docs in self.documents.values()) - def get_translation_status(self, base_language: Optional[str] = None) -> TranslationStatus: + def get_translation_status(self, base_language: str | None = None) -> TranslationStatus: """ Get translation status. @@ -320,7 +318,7 @@ class MultiLanguageManager: completeness=min(completeness, 1.0) ) - def export_by_language(self, output_dir: Path) -> Dict[str, Path]: + def export_by_language(self, output_dir: Path) -> dict[str, Path]: """ Export documents organized by language. diff --git a/src/skill_seekers/cli/presets.py b/src/skill_seekers/cli/presets.py index 2f42b72..402d31c 100644 --- a/src/skill_seekers/cli/presets.py +++ b/src/skill_seekers/cli/presets.py @@ -4,7 +4,6 @@ Provides predefined analysis configurations with clear trade-offs between speed and comprehensiveness. """ from dataclasses import dataclass -from typing import Dict, Optional @dataclass @@ -17,7 +16,7 @@ class AnalysisPreset: name: str description: str depth: str # surface, deep, full - features: Dict[str, bool] # Feature flags (api_reference, patterns, etc.) + features: dict[str, bool] # Feature flags (api_reference, patterns, etc.) enhance_level: int # 0=none, 1=SKILL.md, 2=+Arch+Config, 3=full estimated_time: str icon: str @@ -85,7 +84,7 @@ class PresetManager: """Manages analysis presets and applies them to CLI arguments.""" @staticmethod - def get_preset(name: str) -> Optional[AnalysisPreset]: + def get_preset(name: str) -> AnalysisPreset | None: """Get preset by name. Args: diff --git a/src/skill_seekers/cli/quality_metrics.py b/src/skill_seekers/cli/quality_metrics.py index 2b0f86e..915ff47 100644 --- a/src/skill_seekers/cli/quality_metrics.py +++ b/src/skill_seekers/cli/quality_metrics.py @@ -8,7 +8,7 @@ Tracks completeness, accuracy, coverage, and health metrics. import json from pathlib import Path -from typing import Dict, List, Optional, Any +from typing import Any from dataclasses import dataclass, field, asdict from datetime import datetime from enum import Enum @@ -29,7 +29,7 @@ class QualityMetric: value: float # 0.0-1.0 (or 0-100 percentage) level: MetricLevel description: str - suggestions: List[str] = field(default_factory=list) + suggestions: list[str] = field(default_factory=list) @dataclass @@ -49,10 +49,10 @@ class QualityReport: timestamp: str skill_name: str overall_score: QualityScore - metrics: List[QualityMetric] - statistics: Dict[str, Any] - recommendations: List[str] - history: List[Dict[str, Any]] = field(default_factory=list) + metrics: list[QualityMetric] + statistics: dict[str, Any] + recommendations: list[str] + history: list[dict[str, Any]] = field(default_factory=list) class QualityAnalyzer: @@ -73,8 +73,8 @@ class QualityAnalyzer: def __init__(self, skill_dir: Path): """Initialize quality analyzer.""" self.skill_dir = Path(skill_dir) - self.metrics: List[QualityMetric] = [] - self.statistics: Dict[str, Any] = {} + self.metrics: list[QualityMetric] = [] + self.statistics: dict[str, Any] = {} def analyze_completeness(self) -> float: """ @@ -192,9 +192,8 @@ class QualityAnalyzer: level = MetricLevel.INFO if accuracy >= 80 else MetricLevel.WARNING suggestions = [] - if accuracy < 100: - if issues: - suggestions.extend(issues[:3]) # Top 3 issues + if accuracy < 100 and issues: + suggestions.extend(issues[:3]) # Top 3 issues self.metrics.append(QualityMetric( name="Accuracy", @@ -319,7 +318,7 @@ class QualityAnalyzer: return health - def calculate_statistics(self) -> Dict[str, Any]: + def calculate_statistics(self) -> dict[str, Any]: """Calculate skill statistics.""" stats = { 'total_files': 0, @@ -392,7 +391,7 @@ class QualityAnalyzer: grade=grade ) - def generate_recommendations(self, score: QualityScore) -> List[str]: + def generate_recommendations(self, score: QualityScore) -> list[str]: """Generate improvement recommendations.""" recommendations = [] @@ -545,10 +544,7 @@ def main(): print(formatted) # Save report - if args.output: - report_path = Path(args.output) - else: - report_path = skill_dir / "quality_report.json" + report_path = Path(args.output) if args.output else skill_dir / "quality_report.json" report_path.write_text(json.dumps(asdict(report), indent=2, default=str)) print(f"\nāœ… Report saved: {report_path}") diff --git a/src/skill_seekers/cli/rag_chunker.py b/src/skill_seekers/cli/rag_chunker.py index 6585448..8c97097 100644 --- a/src/skill_seekers/cli/rag_chunker.py +++ b/src/skill_seekers/cli/rag_chunker.py @@ -16,7 +16,6 @@ Usage: import re from pathlib import Path -from typing import List, Dict, Tuple, Optional import json import logging @@ -78,9 +77,9 @@ class RAGChunker: def chunk_document( self, text: str, - metadata: Dict, - source_file: Optional[str] = None - ) -> List[Dict]: + metadata: dict, + source_file: str | None = None + ) -> list[dict]: """ Chunk single document into RAG-ready chunks. @@ -139,7 +138,7 @@ class RAGChunker: return result - def chunk_skill(self, skill_dir: Path) -> List[Dict]: + def chunk_skill(self, skill_dir: Path) -> list[dict]: """ Chunk entire skill directory. @@ -154,7 +153,7 @@ class RAGChunker: # Chunk main SKILL.md skill_md = skill_dir / "SKILL.md" if skill_md.exists(): - with open(skill_md, 'r', encoding='utf-8') as f: + with open(skill_md, encoding='utf-8') as f: content = f.read() metadata = { @@ -170,7 +169,7 @@ class RAGChunker: references_dir = skill_dir / "references" if references_dir.exists(): for ref_file in references_dir.glob("*.md"): - with open(ref_file, 'r', encoding='utf-8') as f: + with open(ref_file, encoding='utf-8') as f: content = f.read() metadata = { @@ -193,7 +192,7 @@ class RAGChunker: return all_chunks - def _extract_code_blocks(self, text: str) -> Tuple[str, List[Dict]]: + def _extract_code_blocks(self, text: str) -> tuple[str, list[dict]]: """ Extract code blocks and replace with placeholders. @@ -231,9 +230,9 @@ class RAGChunker: def _reinsert_code_blocks( self, - chunks: List[str], - code_blocks: List[Dict] - ) -> List[str]: + chunks: list[str], + code_blocks: list[dict] + ) -> list[str]: """ Re-insert code blocks into chunks. @@ -255,7 +254,7 @@ class RAGChunker: return result - def _find_semantic_boundaries(self, text: str) -> List[int]: + def _find_semantic_boundaries(self, text: str) -> list[int]: """ Find paragraph and section boundaries. @@ -303,7 +302,7 @@ class RAGChunker: return boundaries - def _split_with_overlap(self, text: str, boundaries: List[int]) -> List[str]: + def _split_with_overlap(self, text: str, boundaries: list[int]) -> list[str]: """ Split text at semantic boundaries with overlap. @@ -375,7 +374,7 @@ class RAGChunker: return chunks - def save_chunks(self, chunks: List[Dict], output_path: Path) -> None: + def save_chunks(self, chunks: list[dict], output_path: Path) -> None: """ Save chunks to JSON file. diff --git a/src/skill_seekers/cli/storage/azure_storage.py b/src/skill_seekers/cli/storage/azure_storage.py index 2b26ac8..a890a81 100644 --- a/src/skill_seekers/cli/storage/azure_storage.py +++ b/src/skill_seekers/cli/storage/azure_storage.py @@ -4,7 +4,6 @@ Azure Blob Storage adaptor implementation. import os from pathlib import Path -from typing import List, Dict, Optional from datetime import datetime, timedelta try: @@ -118,7 +117,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor): ) def upload_file( - self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None + self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None ) -> str: """Upload file to Azure Blob Storage.""" local_file = Path(local_path) @@ -167,7 +166,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor): def list_files( self, prefix: str = "", max_results: int = 1000 - ) -> List[StorageObject]: + ) -> list[StorageObject]: """List files in Azure container.""" try: blobs = self.container_client.list_blobs( diff --git a/src/skill_seekers/cli/storage/base_storage.py b/src/skill_seekers/cli/storage/base_storage.py index 9824ca1..dd64c8b 100644 --- a/src/skill_seekers/cli/storage/base_storage.py +++ b/src/skill_seekers/cli/storage/base_storage.py @@ -4,7 +4,6 @@ Base storage adaptor interface for cloud storage providers. from abc import ABC, abstractmethod from pathlib import Path -from typing import List, Dict, Optional from dataclasses import dataclass @@ -23,9 +22,9 @@ class StorageObject: key: str size: int - last_modified: Optional[str] = None - etag: Optional[str] = None - metadata: Optional[Dict[str, str]] = None + last_modified: str | None = None + etag: str | None = None + metadata: dict[str, str] | None = None class BaseStorageAdaptor(ABC): @@ -47,7 +46,7 @@ class BaseStorageAdaptor(ABC): @abstractmethod def upload_file( - self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None + self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None ) -> str: """ Upload file to cloud storage. @@ -98,7 +97,7 @@ class BaseStorageAdaptor(ABC): @abstractmethod def list_files( self, prefix: str = "", max_results: int = 1000 - ) -> List[StorageObject]: + ) -> list[StorageObject]: """ List files in cloud storage. @@ -146,8 +145,8 @@ class BaseStorageAdaptor(ABC): pass def upload_directory( - self, local_dir: str, remote_prefix: str = "", exclude_patterns: Optional[List[str]] = None - ) -> List[str]: + self, local_dir: str, remote_prefix: str = "", exclude_patterns: list[str] | None = None + ) -> list[str]: """ Upload entire directory to cloud storage. @@ -194,7 +193,7 @@ class BaseStorageAdaptor(ABC): def download_directory( self, remote_prefix: str, local_dir: str - ) -> List[str]: + ) -> list[str]: """ Download directory from cloud storage. diff --git a/src/skill_seekers/cli/storage/gcs_storage.py b/src/skill_seekers/cli/storage/gcs_storage.py index efc2789..558a8d0 100644 --- a/src/skill_seekers/cli/storage/gcs_storage.py +++ b/src/skill_seekers/cli/storage/gcs_storage.py @@ -4,7 +4,6 @@ Google Cloud Storage (GCS) adaptor implementation. import os from pathlib import Path -from typing import List, Dict, Optional from datetime import timedelta try: @@ -82,7 +81,7 @@ class GCSStorageAdaptor(BaseStorageAdaptor): self.bucket = self.storage_client.bucket(self.bucket_name) def upload_file( - self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None + self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None ) -> str: """Upload file to GCS.""" local_file = Path(local_path) @@ -125,7 +124,7 @@ class GCSStorageAdaptor(BaseStorageAdaptor): def list_files( self, prefix: str = "", max_results: int = 1000 - ) -> List[StorageObject]: + ) -> list[StorageObject]: """List files in GCS bucket.""" try: blobs = self.storage_client.list_blobs( diff --git a/src/skill_seekers/cli/storage/s3_storage.py b/src/skill_seekers/cli/storage/s3_storage.py index 6a30dac..d88e72f 100644 --- a/src/skill_seekers/cli/storage/s3_storage.py +++ b/src/skill_seekers/cli/storage/s3_storage.py @@ -4,7 +4,6 @@ AWS S3 storage adaptor implementation. import os from pathlib import Path -from typing import List, Dict, Optional try: import boto3 @@ -93,7 +92,7 @@ class S3StorageAdaptor(BaseStorageAdaptor): self.s3_resource = boto3.resource('s3', **client_kwargs) def upload_file( - self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None + self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None ) -> str: """Upload file to S3.""" local_file = Path(local_path) @@ -143,7 +142,7 @@ class S3StorageAdaptor(BaseStorageAdaptor): def list_files( self, prefix: str = "", max_results: int = 1000 - ) -> List[StorageObject]: + ) -> list[StorageObject]: """List files in S3 bucket.""" try: paginator = self.s3_client.get_paginator('list_objects_v2') diff --git a/src/skill_seekers/cli/streaming_ingest.py b/src/skill_seekers/cli/streaming_ingest.py index 65baa2c..a673de3 100644 --- a/src/skill_seekers/cli/streaming_ingest.py +++ b/src/skill_seekers/cli/streaming_ingest.py @@ -9,7 +9,7 @@ skill documentation. Handles chunking, progress tracking, and resume functionali import json import hashlib from pathlib import Path -from typing import Any, Iterator, Optional +from collections.abc import Iterator from dataclasses import dataclass import time @@ -102,8 +102,8 @@ class StreamingIngester: self, content: str, metadata: dict, - chunk_size: Optional[int] = None, - chunk_overlap: Optional[int] = None + chunk_size: int | None = None, + chunk_overlap: int | None = None ) -> Iterator[tuple[str, ChunkMetadata]]: """ Split document into overlapping chunks. @@ -180,7 +180,7 @@ class StreamingIngester: def stream_skill_directory( self, skill_dir: Path, - callback: Optional[callable] = None + callback: callable | None = None ) -> Iterator[tuple[str, dict]]: """ Stream all documents from skill directory. @@ -276,7 +276,7 @@ class StreamingIngester: def batch_iterator( self, chunks: Iterator[tuple[str, dict]], - batch_size: Optional[int] = None + batch_size: int | None = None ) -> Iterator[list[tuple[str, dict]]]: """ Group chunks into batches for efficient processing. @@ -328,7 +328,7 @@ class StreamingIngester: checkpoint_path.write_text(json.dumps(checkpoint_data, indent=2)) - def load_checkpoint(self, checkpoint_path: Path) -> Optional[dict]: + def load_checkpoint(self, checkpoint_path: Path) -> dict | None: """ Load ingestion checkpoint for resume. diff --git a/src/skill_seekers/embedding/cache.py b/src/skill_seekers/embedding/cache.py index 001f196..fcb4bda 100644 --- a/src/skill_seekers/embedding/cache.py +++ b/src/skill_seekers/embedding/cache.py @@ -5,7 +5,6 @@ Caching layer for embeddings. import json import sqlite3 from pathlib import Path -from typing import List, Optional, Tuple from datetime import datetime, timedelta @@ -78,7 +77,7 @@ class EmbeddingCache: def set( self, hash_key: str, - embedding: List[float], + embedding: list[float], model: str ) -> None: """ @@ -103,7 +102,7 @@ class EmbeddingCache: self.conn.commit() - def get(self, hash_key: str) -> Optional[List[float]]: + def get(self, hash_key: str) -> list[float] | None: """ Retrieve embedding from cache. @@ -146,7 +145,7 @@ class EmbeddingCache: return json.loads(embedding_json) - def get_batch(self, hash_keys: List[str]) -> Tuple[List[Optional[List[float]]], List[bool]]: + def get_batch(self, hash_keys: list[str]) -> tuple[list[list[float] | None], list[bool]]: """ Retrieve multiple embeddings from cache. @@ -214,7 +213,7 @@ class EmbeddingCache: self.conn.commit() - def clear(self, model: Optional[str] = None) -> int: + def clear(self, model: str | None = None) -> int: """ Clear cache entries. diff --git a/src/skill_seekers/embedding/generator.py b/src/skill_seekers/embedding/generator.py index b6d7b64..b855139 100644 --- a/src/skill_seekers/embedding/generator.py +++ b/src/skill_seekers/embedding/generator.py @@ -4,7 +4,6 @@ Embedding generation with multiple model support. import os import hashlib -from typing import List, Optional, Tuple import numpy as np # OpenAI support @@ -128,9 +127,9 @@ class EmbeddingGenerator: def __init__( self, - api_key: Optional[str] = None, - voyage_api_key: Optional[str] = None, - cache_dir: Optional[str] = None + api_key: str | None = None, + voyage_api_key: str | None = None, + cache_dir: str | None = None ): """ Initialize embedding generator. @@ -168,7 +167,7 @@ class EmbeddingGenerator: ) return self.MODELS[model] - def list_models(self) -> List[dict]: + def list_models(self) -> list[dict]: """List all available models.""" models = [] for name, info in self.MODELS.items(): @@ -186,7 +185,7 @@ class EmbeddingGenerator: text: str, model: str = "text-embedding-3-small", normalize: bool = True - ) -> List[float]: + ) -> list[float]: """ Generate embedding for a single text. @@ -216,11 +215,11 @@ class EmbeddingGenerator: def generate_batch( self, - texts: List[str], + texts: list[str], model: str = "text-embedding-3-small", normalize: bool = True, batch_size: int = 32 - ) -> Tuple[List[List[float]], int]: + ) -> tuple[list[list[float]], int]: """ Generate embeddings for multiple texts. @@ -251,7 +250,7 @@ class EmbeddingGenerator: def _generate_openai( self, text: str, model: str, normalize: bool - ) -> List[float]: + ) -> list[float]: """Generate embedding using OpenAI API.""" if not OPENAI_AVAILABLE: raise ImportError( @@ -277,8 +276,8 @@ class EmbeddingGenerator: raise Exception(f"OpenAI embedding generation failed: {e}") def _generate_openai_batch( - self, texts: List[str], model: str, normalize: bool, batch_size: int - ) -> Tuple[List[List[float]], int]: + self, texts: list[str], model: str, normalize: bool, batch_size: int + ) -> tuple[list[list[float]], int]: """Generate embeddings using OpenAI API in batches.""" if not OPENAI_AVAILABLE: raise ImportError( @@ -316,7 +315,7 @@ class EmbeddingGenerator: def _generate_voyage( self, text: str, model: str, normalize: bool - ) -> List[float]: + ) -> list[float]: """Generate embedding using Voyage AI API.""" if not VOYAGE_AVAILABLE: raise ImportError( @@ -342,8 +341,8 @@ class EmbeddingGenerator: raise Exception(f"Voyage AI embedding generation failed: {e}") def _generate_voyage_batch( - self, texts: List[str], model: str, normalize: bool, batch_size: int - ) -> Tuple[List[List[float]], int]: + self, texts: list[str], model: str, normalize: bool, batch_size: int + ) -> tuple[list[list[float]], int]: """Generate embeddings using Voyage AI API in batches.""" if not VOYAGE_AVAILABLE: raise ImportError( @@ -381,7 +380,7 @@ class EmbeddingGenerator: def _generate_sentence_transformer( self, text: str, model: str, normalize: bool - ) -> List[float]: + ) -> list[float]: """Generate embedding using sentence-transformers.""" if not SENTENCE_TRANSFORMERS_AVAILABLE: raise ImportError( @@ -401,8 +400,8 @@ class EmbeddingGenerator: return embedding.tolist() def _generate_sentence_transformer_batch( - self, texts: List[str], model: str, normalize: bool, batch_size: int - ) -> Tuple[List[List[float]], int]: + self, texts: list[str], model: str, normalize: bool, batch_size: int + ) -> tuple[list[list[float]], int]: """Generate embeddings using sentence-transformers in batches.""" if not SENTENCE_TRANSFORMERS_AVAILABLE: raise ImportError( @@ -428,7 +427,7 @@ class EmbeddingGenerator: return embeddings.tolist(), dimensions @staticmethod - def _normalize(embedding: List[float]) -> List[float]: + def _normalize(embedding: list[float]) -> list[float]: """Normalize embedding to unit length.""" vec = np.array(embedding) norm = np.linalg.norm(vec) diff --git a/src/skill_seekers/embedding/models.py b/src/skill_seekers/embedding/models.py index 14f0b11..bd57f3b 100644 --- a/src/skill_seekers/embedding/models.py +++ b/src/skill_seekers/embedding/models.py @@ -2,7 +2,7 @@ Pydantic models for embedding API. """ -from typing import List, Optional, Dict, Any +from typing import Any from pydantic import BaseModel, Field @@ -32,7 +32,7 @@ class EmbeddingRequest(BaseModel): class BatchEmbeddingRequest(BaseModel): """Request model for batch embedding generation.""" - texts: List[str] = Field(..., description="List of texts to embed") + texts: list[str] = Field(..., description="List of texts to embed") model: str = Field( default="text-embedding-3-small", description="Embedding model to use" @@ -41,7 +41,7 @@ class BatchEmbeddingRequest(BaseModel): default=True, description="Normalize embeddings to unit length" ) - batch_size: Optional[int] = Field( + batch_size: int | None = Field( default=32, description="Batch size for processing (default: 32)" ) @@ -64,7 +64,7 @@ class BatchEmbeddingRequest(BaseModel): class EmbeddingResponse(BaseModel): """Response model for embedding generation.""" - embedding: List[float] = Field(..., description="Generated embedding vector") + embedding: list[float] = Field(..., description="Generated embedding vector") model: str = Field(..., description="Model used for generation") dimensions: int = Field(..., description="Embedding dimensions") cached: bool = Field( @@ -76,7 +76,7 @@ class EmbeddingResponse(BaseModel): class BatchEmbeddingResponse(BaseModel): """Response model for batch embedding generation.""" - embeddings: List[List[float]] = Field(..., description="List of embedding vectors") + embeddings: list[list[float]] = Field(..., description="List of embedding vectors") model: str = Field(..., description="Model used for generation") dimensions: int = Field(..., description="Embedding dimensions") count: int = Field(..., description="Number of embeddings generated") @@ -121,7 +121,7 @@ class SkillEmbeddingResponse(BaseModel): total_chunks: int = Field(..., description="Total number of chunks embedded") model: str = Field(..., description="Model used for generation") dimensions: int = Field(..., description="Embedding dimensions") - metadata: Dict[str, Any] = Field( + metadata: dict[str, Any] = Field( default_factory=dict, description="Skill metadata" ) @@ -132,9 +132,9 @@ class HealthResponse(BaseModel): status: str = Field(..., description="Service status") version: str = Field(..., description="API version") - models: List[str] = Field(..., description="Available embedding models") + models: list[str] = Field(..., description="Available embedding models") cache_enabled: bool = Field(..., description="Whether cache is enabled") - cache_size: Optional[int] = Field(None, description="Number of cached embeddings") + cache_size: int | None = Field(None, description="Number of cached embeddings") class ModelInfo(BaseModel): @@ -144,7 +144,7 @@ class ModelInfo(BaseModel): provider: str = Field(..., description="Model provider (openai, anthropic, sentence-transformers)") dimensions: int = Field(..., description="Embedding dimensions") max_tokens: int = Field(..., description="Maximum input tokens") - cost_per_million: Optional[float] = Field( + cost_per_million: float | None = Field( None, description="Cost per million tokens (if applicable)" ) @@ -153,5 +153,5 @@ class ModelInfo(BaseModel): class ModelsResponse(BaseModel): """Response model for listing available models.""" - models: List[ModelInfo] = Field(..., description="List of available models") + models: list[ModelInfo] = Field(..., description="List of available models") count: int = Field(..., description="Number of available models") diff --git a/src/skill_seekers/embedding/server.py b/src/skill_seekers/embedding/server.py index 3311a4e..6f66d1a 100644 --- a/src/skill_seekers/embedding/server.py +++ b/src/skill_seekers/embedding/server.py @@ -20,7 +20,6 @@ Usage: import os import sys from pathlib import Path -from typing import List, Optional try: from fastapi import FastAPI, HTTPException, Query @@ -208,7 +207,7 @@ if FASTAPI_AVAILABLE: ) # Fill in placeholders and cache - for idx, text, embedding in zip(text_indices, texts_to_generate, generated_embeddings): + for idx, text, embedding in zip(text_indices, texts_to_generate, generated_embeddings, strict=False): embeddings[idx] = embedding if cache: @@ -300,7 +299,7 @@ if FASTAPI_AVAILABLE: @app.post("/cache/clear", response_model=dict) async def clear_cache( - model: Optional[str] = Query(None, description="Model to clear (all if not specified)") + model: str | None = Query(None, description="Model to clear (all if not specified)") ): """Clear cache entries.""" if not cache: diff --git a/src/skill_seekers/mcp/tools/vector_db_tools.py b/src/skill_seekers/mcp/tools/vector_db_tools.py index ec8ddd3..a0e5a28 100644 --- a/src/skill_seekers/mcp/tools/vector_db_tools.py +++ b/src/skill_seekers/mcp/tools/vector_db_tools.py @@ -12,7 +12,6 @@ Each tool provides a direct interface to its respective vector database adaptor. import sys from pathlib import Path -from typing import List try: from mcp.types import TextContent @@ -36,7 +35,7 @@ except ImportError: get_adaptor = None # Will handle gracefully below -async def export_to_weaviate_impl(args: dict) -> List[TextContent]: +async def export_to_weaviate_impl(args: dict) -> list[TextContent]: """ Export skill to Weaviate vector database format. @@ -140,7 +139,7 @@ async def export_to_weaviate_impl(args: dict) -> List[TextContent]: ] -async def export_to_chroma_impl(args: dict) -> List[TextContent]: +async def export_to_chroma_impl(args: dict) -> list[TextContent]: """ Export skill to Chroma vector database format. @@ -244,7 +243,7 @@ async def export_to_chroma_impl(args: dict) -> List[TextContent]: ] -async def export_to_faiss_impl(args: dict) -> List[TextContent]: +async def export_to_faiss_impl(args: dict) -> list[TextContent]: """ Export skill to FAISS vector index format. @@ -363,7 +362,7 @@ async def export_to_faiss_impl(args: dict) -> List[TextContent]: ] -async def export_to_qdrant_impl(args: dict) -> List[TextContent]: +async def export_to_qdrant_impl(args: dict) -> list[TextContent]: """ Export skill to Qdrant vector database format. diff --git a/src/skill_seekers/sync/detector.py b/src/skill_seekers/sync/detector.py index dd29140..381850c 100644 --- a/src/skill_seekers/sync/detector.py +++ b/src/skill_seekers/sync/detector.py @@ -4,10 +4,8 @@ Change detection for documentation pages. import hashlib import difflib -from typing import Dict, List, Optional, Tuple from datetime import datetime import requests -from pathlib import Path from .models import PageChange, ChangeType, ChangeReport @@ -59,7 +57,7 @@ class ChangeDetector: """ return hashlib.sha256(content.encode('utf-8')).hexdigest() - def fetch_page(self, url: str) -> Tuple[str, Dict[str, str]]: + def fetch_page(self, url: str) -> tuple[str, dict[str, str]]: """ Fetch page content and metadata. @@ -92,9 +90,9 @@ class ChangeDetector: def check_page( self, url: str, - old_hash: Optional[str] = None, + old_hash: str | None = None, generate_diff: bool = False, - old_content: Optional[str] = None + old_content: str | None = None ) -> PageChange: """ Check if page has changed. @@ -137,7 +135,7 @@ class ChangeDetector: detected_at=datetime.utcnow() ) - except requests.RequestException as e: + except requests.RequestException: # Page might be deleted or temporarily unavailable return PageChange( url=url, @@ -149,8 +147,8 @@ class ChangeDetector: def check_pages( self, - urls: List[str], - previous_hashes: Dict[str, str], + urls: list[str], + previous_hashes: dict[str, str], generate_diffs: bool = False ) -> ChangeReport: """ @@ -254,8 +252,8 @@ class ChangeDetector: def check_header_changes( self, url: str, - old_modified: Optional[str] = None, - old_etag: Optional[str] = None + old_modified: str | None = None, + old_etag: str | None = None ) -> bool: """ Quick check using HTTP headers (no content download). @@ -284,10 +282,7 @@ class ChangeDetector: if old_modified and new_modified and old_modified != new_modified: return True - if old_etag and new_etag and old_etag != new_etag: - return True - - return False + return bool(old_etag and new_etag and old_etag != new_etag) except requests.RequestException: # If HEAD request fails, assume change (will be verified with GET) @@ -295,9 +290,9 @@ class ChangeDetector: def batch_check_headers( self, - urls: List[str], - previous_metadata: Dict[str, Dict[str, str]] - ) -> List[str]: + urls: list[str], + previous_metadata: dict[str, dict[str, str]] + ) -> list[str]: """ Batch check URLs using headers only. diff --git a/src/skill_seekers/sync/models.py b/src/skill_seekers/sync/models.py index def13b4..bacf6b1 100644 --- a/src/skill_seekers/sync/models.py +++ b/src/skill_seekers/sync/models.py @@ -2,7 +2,7 @@ Pydantic models for sync system. """ -from typing import List, Optional, Dict, Any +from typing import Any from datetime import datetime from enum import Enum from pydantic import BaseModel, Field @@ -21,9 +21,9 @@ class PageChange(BaseModel): url: str = Field(..., description="Page URL") change_type: ChangeType = Field(..., description="Type of change") - old_hash: Optional[str] = Field(None, description="Previous content hash") - new_hash: Optional[str] = Field(None, description="New content hash") - diff: Optional[str] = Field(None, description="Content diff (if available)") + old_hash: str | None = Field(None, description="Previous content hash") + new_hash: str | None = Field(None, description="New content hash") + diff: str | None = Field(None, description="Content diff (if available)") detected_at: datetime = Field( default_factory=datetime.utcnow, description="When change was detected" @@ -47,9 +47,9 @@ class ChangeReport(BaseModel): skill_name: str = Field(..., description="Skill name") total_pages: int = Field(..., description="Total pages checked") - added: List[PageChange] = Field(default_factory=list, description="Added pages") - modified: List[PageChange] = Field(default_factory=list, description="Modified pages") - deleted: List[PageChange] = Field(default_factory=list, description="Deleted pages") + added: list[PageChange] = Field(default_factory=list, description="Added pages") + modified: list[PageChange] = Field(default_factory=list, description="Modified pages") + deleted: list[PageChange] = Field(default_factory=list, description="Deleted pages") unchanged: int = Field(0, description="Number of unchanged pages") checked_at: datetime = Field( default_factory=datetime.utcnow, @@ -84,19 +84,19 @@ class SyncConfig(BaseModel): default=True, description="Send notifications on changes" ) - notification_channels: List[str] = Field( + notification_channels: list[str] = Field( default_factory=list, description="Notification channels (email, slack, webhook)" ) - webhook_url: Optional[str] = Field( + webhook_url: str | None = Field( None, description="Webhook URL for change notifications" ) - email_recipients: List[str] = Field( + email_recipients: list[str] = Field( default_factory=list, description="Email recipients for notifications" ) - slack_webhook: Optional[str] = Field( + slack_webhook: str | None = Field( None, description="Slack webhook URL" ) @@ -120,16 +120,16 @@ class SyncState(BaseModel): """Current state of sync monitoring.""" skill_name: str = Field(..., description="Skill name") - last_check: Optional[datetime] = Field(None, description="Last check time") - last_change: Optional[datetime] = Field(None, description="Last change detected") + last_check: datetime | None = Field(None, description="Last check time") + last_change: datetime | None = Field(None, description="Last change detected") total_checks: int = Field(default=0, description="Total checks performed") total_changes: int = Field(default=0, description="Total changes detected") - page_hashes: Dict[str, str] = Field( + page_hashes: dict[str, str] = Field( default_factory=dict, description="URL -> content hash mapping" ) status: str = Field(default="idle", description="Current status") - error: Optional[str] = Field(None, description="Last error message") + error: str | None = Field(None, description="Last error message") class WebhookPayload(BaseModel): @@ -141,8 +141,8 @@ class WebhookPayload(BaseModel): default_factory=datetime.utcnow, description="Event timestamp" ) - changes: Optional[ChangeReport] = Field(None, description="Change report") - metadata: Dict[str, Any] = Field( + changes: ChangeReport | None = Field(None, description="Change report") + metadata: dict[str, Any] = Field( default_factory=dict, description="Additional metadata" ) diff --git a/src/skill_seekers/sync/monitor.py b/src/skill_seekers/sync/monitor.py index c9b193c..26aea11 100644 --- a/src/skill_seekers/sync/monitor.py +++ b/src/skill_seekers/sync/monitor.py @@ -6,12 +6,12 @@ import json import time import threading from pathlib import Path -from typing import Optional, Dict, List, Callable +from collections.abc import Callable from datetime import datetime import schedule from .detector import ChangeDetector -from .models import SyncConfig, SyncState, ChangeReport, WebhookPayload +from .models import SyncState, ChangeReport, WebhookPayload from .notifier import Notifier @@ -50,8 +50,8 @@ class SyncMonitor: config_path: str, check_interval: int = 3600, auto_update: bool = False, - state_file: Optional[str] = None, - on_change: Optional[Callable[[ChangeReport], None]] = None + state_file: str | None = None, + on_change: Callable[[ChangeReport], None] | None = None ): """ Initialize sync monitor. @@ -244,7 +244,7 @@ class SyncMonitor: print(f"šŸ›‘ Stopped monitoring {self.skill_name}") - def stats(self) -> Dict: + def stats(self) -> dict: """Get monitoring statistics.""" return { "skill_name": self.skill_name, diff --git a/src/skill_seekers/sync/notifier.py b/src/skill_seekers/sync/notifier.py index 546ad08..c581a8c 100644 --- a/src/skill_seekers/sync/notifier.py +++ b/src/skill_seekers/sync/notifier.py @@ -4,7 +4,6 @@ Notification system for sync events. import os import requests -from typing import Optional, List from .models import WebhookPayload @@ -32,9 +31,9 @@ class Notifier: def __init__( self, - webhook_url: Optional[str] = None, - slack_webhook: Optional[str] = None, - email_recipients: Optional[List[str]] = None, + webhook_url: str | None = None, + slack_webhook: str | None = None, + email_recipients: list[str] | None = None, console: bool = True ): """ diff --git a/tests/test_adaptor_benchmarks.py b/tests/test_adaptor_benchmarks.py index 987868e..bd3f362 100644 --- a/tests/test_adaptor_benchmarks.py +++ b/tests/test_adaptor_benchmarks.py @@ -207,7 +207,7 @@ class TestAdaptorBenchmarks(unittest.TestCase): time_per_ref = elapsed / ref_count # Get output size - data = json.loads(formatted) + json.loads(formatted) size_kb = len(formatted) / 1024 results.append({ @@ -350,14 +350,14 @@ class TestAdaptorBenchmarks(unittest.TestCase): empty_dir.mkdir() start = time.perf_counter() - empty_result = adaptor.format_skill_md(empty_dir, metadata) + adaptor.format_skill_md(empty_dir, metadata) empty_time = time.perf_counter() - start # Full skill (50 references) full_dir = self._create_skill_with_n_references(50) start = time.perf_counter() - full_result = adaptor.format_skill_md(full_dir, metadata) + adaptor.format_skill_md(full_dir, metadata) full_time = time.perf_counter() - start print(f"\nEmpty skill: {empty_time*1000:.2f}ms") diff --git a/tests/test_adaptors/test_adaptors_e2e.py b/tests/test_adaptors/test_adaptors_e2e.py index 19fecf6..619a604 100644 --- a/tests/test_adaptors/test_adaptors_e2e.py +++ b/tests/test_adaptors/test_adaptors_e2e.py @@ -850,7 +850,6 @@ export default { # Should have categories from reference files # Files: getting_started.md, reactivity_api.md, components_guide.md # Categories derived from filenames (stem.replace("_", " ").lower()) - expected_refs = {"getting started", "reactivity api", "components guide"} # Check that at least one reference category exists ref_categories = categories - {"overview"} diff --git a/tests/test_adaptors/test_chroma_adaptor.py b/tests/test_adaptors/test_chroma_adaptor.py index 0c56e6a..e36e61e 100644 --- a/tests/test_adaptors/test_chroma_adaptor.py +++ b/tests/test_adaptors/test_chroma_adaptor.py @@ -4,8 +4,6 @@ Tests for Chroma Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_adaptors/test_faiss_adaptor.py b/tests/test_adaptors/test_faiss_adaptor.py index ec62ee4..d0993a3 100644 --- a/tests/test_adaptors/test_faiss_adaptor.py +++ b/tests/test_adaptors/test_faiss_adaptor.py @@ -4,8 +4,6 @@ Tests for FAISS Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_adaptors/test_haystack_adaptor.py b/tests/test_adaptors/test_haystack_adaptor.py index 207e36d..e0278d6 100644 --- a/tests/test_adaptors/test_haystack_adaptor.py +++ b/tests/test_adaptors/test_haystack_adaptor.py @@ -4,8 +4,6 @@ Tests for Haystack Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_adaptors/test_langchain_adaptor.py b/tests/test_adaptors/test_langchain_adaptor.py index 7beec82..9919bc3 100644 --- a/tests/test_adaptors/test_langchain_adaptor.py +++ b/tests/test_adaptors/test_langchain_adaptor.py @@ -4,8 +4,6 @@ Tests for LangChain Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_adaptors/test_llama_index_adaptor.py b/tests/test_adaptors/test_llama_index_adaptor.py index 7e5fe28..ab01f21 100644 --- a/tests/test_adaptors/test_llama_index_adaptor.py +++ b/tests/test_adaptors/test_llama_index_adaptor.py @@ -4,8 +4,6 @@ Tests for LlamaIndex Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_adaptors/test_qdrant_adaptor.py b/tests/test_adaptors/test_qdrant_adaptor.py index 9831174..019f926 100644 --- a/tests/test_adaptors/test_qdrant_adaptor.py +++ b/tests/test_adaptors/test_qdrant_adaptor.py @@ -4,8 +4,6 @@ Tests for Qdrant Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_adaptors/test_weaviate_adaptor.py b/tests/test_adaptors/test_weaviate_adaptor.py index 419ac96..6a50e11 100644 --- a/tests/test_adaptors/test_weaviate_adaptor.py +++ b/tests/test_adaptors/test_weaviate_adaptor.py @@ -4,8 +4,6 @@ Tests for Weaviate Adaptor """ import json -import tempfile -from pathlib import Path import pytest diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 739d8cc..b0f5a8c 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -4,10 +4,8 @@ Tests for benchmarking suite. import time import json -from pathlib import Path from datetime import datetime -import pytest from skill_seekers.benchmark import ( Benchmark, @@ -164,7 +162,7 @@ class TestBenchmark: with benchmark.memory("operation"): # Allocate some memory - data = [0] * 1000000 + pass assert len(benchmark.result.memory) == 1 assert benchmark.result.memory[0].operation == "operation" @@ -394,7 +392,7 @@ class TestBenchmarkRunner: with bench.timer("operation"): time.sleep(0.1) - baseline_report = runner.run("baseline", baseline_bench, save=True) + runner.run("baseline", baseline_bench, save=True) baseline_path = list(tmp_path.glob("baseline_*.json"))[0] # Create faster version @@ -402,7 +400,7 @@ class TestBenchmarkRunner: with bench.timer("operation"): time.sleep(0.05) - improved_report = runner.run("improved", improved_bench, save=True) + runner.run("improved", improved_bench, save=True) improved_path = list(tmp_path.glob("improved_*.json"))[0] # Compare diff --git a/tests/test_chunking_integration.py b/tests/test_chunking_integration.py index 42ef2dd..7bdd029 100644 --- a/tests/test_chunking_integration.py +++ b/tests/test_chunking_integration.py @@ -12,7 +12,6 @@ import pytest import json from pathlib import Path from skill_seekers.cli.adaptors import get_adaptor -from skill_seekers.cli.adaptors.base import SkillMetadata def create_test_skill(tmp_path: Path, large_doc: bool = False) -> Path: @@ -293,7 +292,7 @@ class TestBaseAdaptorChunkingHelper: for chunk_text, chunk_meta in chunks: assert isinstance(chunk_text, str) assert isinstance(chunk_meta, dict) - assert chunk_meta['is_chunked'] == True + assert chunk_meta['is_chunked'] assert 'chunk_index' in chunk_meta assert 'chunk_id' in chunk_meta # Original metadata preserved diff --git a/tests/test_cloud_storage.py b/tests/test_cloud_storage.py index d11fa56..7c9951c 100644 --- a/tests/test_cloud_storage.py +++ b/tests/test_cloud_storage.py @@ -6,7 +6,7 @@ import os import pytest import tempfile from pathlib import Path -from unittest.mock import Mock, patch, MagicMock +from unittest.mock import Mock, patch from skill_seekers.cli.storage import ( get_storage_adaptor, diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 956ea9e..b54e664 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -5,7 +5,7 @@ Tests for embedding generation system. import pytest import tempfile from pathlib import Path -from unittest.mock import Mock, patch +from unittest.mock import patch from skill_seekers.embedding.models import ( EmbeddingRequest, diff --git a/tests/test_embedding_pipeline.py b/tests/test_embedding_pipeline.py index 780f429..f7d316f 100644 --- a/tests/test_embedding_pipeline.py +++ b/tests/test_embedding_pipeline.py @@ -14,7 +14,6 @@ import pytest from pathlib import Path import sys import tempfile -import json # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) diff --git a/tests/test_incremental_updates.py b/tests/test_incremental_updates.py index ab00a65..dc945e8 100644 --- a/tests/test_incremental_updates.py +++ b/tests/test_incremental_updates.py @@ -21,9 +21,7 @@ import time sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from skill_seekers.cli.incremental_updater import ( - IncrementalUpdater, - DocumentVersion, - ChangeSet + IncrementalUpdater ) @@ -67,7 +65,7 @@ def test_no_changes_after_save(temp_skill_dir): updater = IncrementalUpdater(temp_skill_dir) # First scan - change_set1 = updater.detect_changes() + updater.detect_changes() updater.save_current_versions() # Second scan (no changes) diff --git a/tests/test_integration_adaptors.py b/tests/test_integration_adaptors.py index 0011ce4..f9b6273 100644 --- a/tests/test_integration_adaptors.py +++ b/tests/test_integration_adaptors.py @@ -17,12 +17,12 @@ Usage: import json import time -from pathlib import Path import pytest from skill_seekers.cli.adaptors import get_adaptor from skill_seekers.cli.adaptors.base import SkillMetadata +import contextlib @pytest.fixture @@ -144,7 +144,7 @@ class TestWeaviateIntegration: # Package skill adaptor = get_adaptor("weaviate") - metadata = SkillMetadata( + SkillMetadata( name="integration_test", description="Integration test skill for Weaviate" ) @@ -231,7 +231,7 @@ class TestWeaviateIntegration: # Package with rich metadata adaptor = get_adaptor("weaviate") - metadata = SkillMetadata( + SkillMetadata( name="metadata_test", description="Test metadata preservation", version="2.0.0", @@ -271,10 +271,8 @@ class TestWeaviateIntegration: assert "test" in obj["tags"], "Tags not preserved" finally: - try: + with contextlib.suppress(Exception): client.schema.delete_class(class_name) - except Exception: - pass @pytest.mark.integration @@ -302,7 +300,7 @@ class TestChromaIntegration: # Package skill adaptor = get_adaptor("chroma") - metadata = SkillMetadata( + SkillMetadata( name="chroma_integration_test", description="Integration test skill for ChromaDB" ) @@ -415,10 +413,8 @@ class TestChromaIntegration: "Filter returned wrong category" finally: - try: + with contextlib.suppress(Exception): client.delete_collection(name=collection_name) - except Exception: - pass @pytest.mark.integration @@ -447,7 +443,7 @@ class TestQdrantIntegration: # Package skill adaptor = get_adaptor("qdrant") - metadata = SkillMetadata( + SkillMetadata( name="qdrant_integration_test", description="Integration test skill for Qdrant" ) @@ -554,7 +550,7 @@ class TestQdrantIntegration: # Package and upload adaptor = get_adaptor("qdrant") - metadata = SkillMetadata( + SkillMetadata( name="qdrant_filter_test", description="Test filtering capabilities" ) @@ -610,10 +606,8 @@ class TestQdrantIntegration: "Filter returned wrong type" finally: - try: + with contextlib.suppress(Exception): client.delete_collection(collection_name) - except Exception: - pass if __name__ == "__main__": diff --git a/tests/test_issue_277_real_world.py b/tests/test_issue_277_real_world.py index 38051a1..263764a 100644 --- a/tests/test_issue_277_real_world.py +++ b/tests/test_issue_277_real_world.py @@ -61,15 +61,6 @@ class TestIssue277RealWorld(unittest.TestCase): ) # Verify correct transformed URLs - expected_urls = { - "https://mikro-orm.io/docs/index.html.md", # Root URL - "https://mikro-orm.io/docs/reference.md", # Already .md - "https://mikro-orm.io/docs/quick-start/index.html.md", # Deduplicated from anchor - "https://mikro-orm.io/docs/repositories.md", # Already .md, anchor stripped - "https://mikro-orm.io/docs/propagation/index.html.md", - "https://mikro-orm.io/docs/defining-entities.md", # Already .md, deduplicated - "https://mikro-orm.io/docs/defining-entities/index.html.md", # Non-.md version - } # Check that we got the expected number of unique URLs # Note: defining-entities has both .md and non-.md versions, so we have 2 entries for it diff --git a/tests/test_multilang_support.py b/tests/test_multilang_support.py index 0c390e6..7c22271 100644 --- a/tests/test_multilang_support.py +++ b/tests/test_multilang_support.py @@ -21,8 +21,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from skill_seekers.cli.multilang_support import ( LanguageDetector, - MultiLanguageManager, - LanguageInfo + MultiLanguageManager ) diff --git a/tests/test_preset_system.py b/tests/test_preset_system.py index df308df..6626ed3 100644 --- a/tests/test_preset_system.py +++ b/tests/test_preset_system.py @@ -40,12 +40,12 @@ class TestPresetDefinitions: assert quick.estimated_time == '1-2 minutes' assert quick.icon == '⚔' # Quick should disable slow features - assert quick.features['api_reference'] == True # Essential - assert quick.features['dependency_graph'] == False # Slow - assert quick.features['patterns'] == False # Slow - assert quick.features['test_examples'] == False # Slow - assert quick.features['how_to_guides'] == False # Requires AI - assert quick.features['docs'] == True # Essential + assert quick.features['api_reference'] # Essential + assert not quick.features['dependency_graph'] # Slow + assert not quick.features['patterns'] # Slow + assert not quick.features['test_examples'] # Slow + assert not quick.features['how_to_guides'] # Requires AI + assert quick.features['docs'] # Essential def test_standard_preset(self): """Test standard preset configuration.""" @@ -56,13 +56,13 @@ class TestPresetDefinitions: assert standard.estimated_time == '5-10 minutes' assert standard.icon == 'šŸŽÆ' # Standard should enable core features - assert standard.features['api_reference'] == True - assert standard.features['dependency_graph'] == True - assert standard.features['patterns'] == True - assert standard.features['test_examples'] == True - assert standard.features['how_to_guides'] == False # Slow - assert standard.features['config_patterns'] == True - assert standard.features['docs'] == True + assert standard.features['api_reference'] + assert standard.features['dependency_graph'] + assert standard.features['patterns'] + assert standard.features['test_examples'] + assert not standard.features['how_to_guides'] # Slow + assert standard.features['config_patterns'] + assert standard.features['docs'] def test_comprehensive_preset(self): """Test comprehensive preset configuration.""" @@ -131,12 +131,12 @@ class TestPresetApplication: assert updated['depth'] == 'surface' assert updated['enhance_level'] == 0 - assert updated['skip_patterns'] == True # Quick disables patterns - assert updated['skip_dependency_graph'] == True # Quick disables dep graph - assert updated['skip_test_examples'] == True # Quick disables tests - assert updated['skip_how_to_guides'] == True # Quick disables guides - assert updated['skip_api_reference'] == False # Quick enables API ref - assert updated['skip_docs'] == False # Quick enables docs + assert updated['skip_patterns'] # Quick disables patterns + assert updated['skip_dependency_graph'] # Quick disables dep graph + assert updated['skip_test_examples'] # Quick disables tests + assert updated['skip_how_to_guides'] # Quick disables guides + assert not updated['skip_api_reference'] # Quick enables API ref + assert not updated['skip_docs'] # Quick enables docs def test_apply_preset_standard(self): """Test applying standard preset.""" @@ -145,12 +145,12 @@ class TestPresetApplication: assert updated['depth'] == 'deep' assert updated['enhance_level'] == 1 - assert updated['skip_patterns'] == False # Standard enables patterns - assert updated['skip_dependency_graph'] == False # Standard enables dep graph - assert updated['skip_test_examples'] == False # Standard enables tests - assert updated['skip_how_to_guides'] == True # Standard disables guides (slow) - assert updated['skip_api_reference'] == False # Standard enables API ref - assert updated['skip_docs'] == False # Standard enables docs + assert not updated['skip_patterns'] # Standard enables patterns + assert not updated['skip_dependency_graph'] # Standard enables dep graph + assert not updated['skip_test_examples'] # Standard enables tests + assert updated['skip_how_to_guides'] # Standard disables guides (slow) + assert not updated['skip_api_reference'] # Standard enables API ref + assert not updated['skip_docs'] # Standard enables docs def test_apply_preset_comprehensive(self): """Test applying comprehensive preset.""" @@ -160,13 +160,13 @@ class TestPresetApplication: assert updated['depth'] == 'full' assert updated['enhance_level'] == 3 # Comprehensive enables ALL features - assert updated['skip_patterns'] == False - assert updated['skip_dependency_graph'] == False - assert updated['skip_test_examples'] == False - assert updated['skip_how_to_guides'] == False - assert updated['skip_api_reference'] == False - assert updated['skip_config_patterns'] == False - assert updated['skip_docs'] == False + assert not updated['skip_patterns'] + assert not updated['skip_dependency_graph'] + assert not updated['skip_test_examples'] + assert not updated['skip_how_to_guides'] + assert not updated['skip_api_reference'] + assert not updated['skip_config_patterns'] + assert not updated['skip_docs'] def test_cli_overrides_preset(self): """Test that CLI args override preset defaults.""" @@ -182,7 +182,7 @@ class TestPresetApplication: assert updated['enhance_level'] == 2 # CLI wins # Preset says skip_patterns=True (disabled), but CLI said False (enabled) - assert updated['skip_patterns'] == False # CLI wins + assert not updated['skip_patterns'] # CLI wins def test_apply_preset_preserves_args(self): """Test that apply_preset preserves existing args.""" diff --git a/tests/test_rag_chunker.py b/tests/test_rag_chunker.py index a2c5c80..7ce7282 100644 --- a/tests/test_rag_chunker.py +++ b/tests/test_rag_chunker.py @@ -3,9 +3,7 @@ Tests for RAG Chunker (semantic chunking for RAG pipelines). """ import pytest -from pathlib import Path import json -import tempfile from skill_seekers.cli.rag_chunker import RAGChunker @@ -199,7 +197,7 @@ class TestRAGChunker: assert len(chunks) > 0 # Check metadata diversity - categories = set(chunk["metadata"]["category"] for chunk in chunks) + categories = {chunk["metadata"]["category"] for chunk in chunks} assert "overview" in categories # From SKILL.md assert "getting_started" in categories or "api" in categories # From references @@ -222,7 +220,7 @@ class TestRAGChunker: assert output_path.exists() # Check content - with open(output_path, 'r') as f: + with open(output_path) as f: loaded = json.load(f) assert len(loaded) == 1 diff --git a/tests/test_streaming_ingestion.py b/tests/test_streaming_ingestion.py index 4a1292d..7360b6b 100644 --- a/tests/test_streaming_ingestion.py +++ b/tests/test_streaming_ingestion.py @@ -14,15 +14,13 @@ import pytest from pathlib import Path import sys import tempfile -import json # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from skill_seekers.cli.streaming_ingest import ( StreamingIngester, - IngestionProgress, - ChunkMetadata + IngestionProgress ) diff --git a/tests/test_upload_integration.py b/tests/test_upload_integration.py index 9469af0..6c1eaac 100644 --- a/tests/test_upload_integration.py +++ b/tests/test_upload_integration.py @@ -6,10 +6,7 @@ Tests real upload capabilities for vector databases. """ import json -import os import pytest -from pathlib import Path -from unittest.mock import Mock, patch # Import adaptors from skill_seekers.cli.adaptors import get_adaptor @@ -211,7 +208,6 @@ class TestUploadCommandIntegration: def test_upload_command_supports_chroma(self): """Test upload command recognizes chroma as target.""" - from skill_seekers.cli.upload_skill import upload_skill_api # This should not raise ValueError adaptor = get_adaptor('chroma') @@ -219,7 +215,6 @@ class TestUploadCommandIntegration: def test_upload_command_supports_weaviate(self): """Test upload command recognizes weaviate as target.""" - from skill_seekers.cli.upload_skill import upload_skill_api # This should not raise ValueError adaptor = get_adaptor('weaviate') diff --git a/tests/test_url_conversion.py b/tests/test_url_conversion.py index 5e40f67..3eb31bd 100644 --- a/tests/test_url_conversion.py +++ b/tests/test_url_conversion.py @@ -4,7 +4,6 @@ Covers bug fix for issue #277: URLs with anchor fragments causing 404 errors. """ import unittest -from unittest.mock import MagicMock from skill_seekers.cli.doc_scraper import DocToSkillConverter