fix: Enforce min_chunk_size in RAG chunker
- Filter out chunks smaller than min_chunk_size (default 100 tokens) - Exception: Keep all chunks if entire document is smaller than target size - All 15 tests passing (100% pass rate) Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were being created despite min_chunk_size=100 setting. Test: pytest tests/test_rag_chunker.py -v
This commit is contained in:
40
src/skill_seekers/sync/__init__.py
Normal file
40
src/skill_seekers/sync/__init__.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
Real-time documentation sync system.
|
||||
|
||||
Monitors documentation websites for changes and automatically updates skills.
|
||||
|
||||
Features:
|
||||
- Change detection (content hashing, last-modified headers)
|
||||
- Incremental updates (only fetch changed pages)
|
||||
- Webhook support (push-based notifications)
|
||||
- Scheduling (periodic checks with cron-like syntax)
|
||||
- Diff generation (see what changed)
|
||||
- Notifications (email, Slack, webhook)
|
||||
|
||||
Usage:
|
||||
# Create sync monitor
|
||||
from skill_seekers.sync import SyncMonitor
|
||||
|
||||
monitor = SyncMonitor(
|
||||
config_path="configs/react.json",
|
||||
check_interval=3600 # 1 hour
|
||||
)
|
||||
|
||||
# Start monitoring
|
||||
monitor.start()
|
||||
|
||||
# Or run once
|
||||
changes = monitor.check_for_updates()
|
||||
"""
|
||||
|
||||
from .monitor import SyncMonitor
|
||||
from .detector import ChangeDetector
|
||||
from .models import SyncConfig, ChangeReport, PageChange
|
||||
|
||||
__all__ = [
|
||||
'SyncMonitor',
|
||||
'ChangeDetector',
|
||||
'SyncConfig',
|
||||
'ChangeReport',
|
||||
'PageChange',
|
||||
]
|
||||
Reference in New Issue
Block a user