Files
skill-seekers-reference/src/skill_seekers/cli/incremental_updater.py
yusyus a332507b1d fix: Fix 2 critical CLI issues blocking production (Kimi QA)
**Critical Issues Fixed:**

Issue #1: CLI Commands Were BROKEN ⚠️ CRITICAL
- Problem: 4 CLI commands existed but failed at runtime with ImportError
- Root Cause: Modules had example_usage() instead of main() functions
- Impact: Users couldn't use quality, stream, update, multilang features

**Fixed Files:**
- src/skill_seekers/cli/quality_metrics.py
  - Renamed example_usage() → main()
  - Added argparse with --report, --output flags
  - Proper exit codes and error handling

- src/skill_seekers/cli/streaming_ingest.py
  - Renamed example_usage() → main()
  - Added argparse with --chunk-size, --batch-size, --checkpoint flags
  - Supports both file and directory inputs

- src/skill_seekers/cli/incremental_updater.py
  - Renamed example_usage() → main()
  - Added argparse with --check-changes, --generate-package, --apply-update flags
  - Proper error handling and exit codes

- src/skill_seekers/cli/multilang_support.py
  - Renamed example_usage() → main()
  - Added argparse with --detect, --report, --export flags
  - Loads skill documents from directory

Issue #2: Haystack Missing from Package Choices ⚠️ CRITICAL
- Problem: Haystack adaptor worked but couldn't be used via CLI
- Root Cause: package_skill.py missing "haystack" in --target choices
- Impact: Users got "invalid choice" error when packaging for Haystack

**Fixed:**
- src/skill_seekers/cli/package_skill.py:188
  - Added "haystack" to --target choices list
  - Now matches main.py choices (all 11 platforms)

**Verification:**
 All 4 CLI commands now work:
   $ skill-seekers quality --help
   $ skill-seekers stream --help
   $ skill-seekers update --help
   $ skill-seekers multilang --help

 Haystack now available:
   $ skill-seekers package output/skill --target haystack

 All 164 adaptor tests still passing
 No regressions detected

**Credits:**
- Issues identified by: Kimi QA Review
- Fixes implemented by: Claude Sonnet 4.5

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-07 23:12:40 +03:00

479 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Incremental Updates for Skills
Provides smart change detection and partial updates to avoid full rebuilds.
Tracks document versions and generates delta packages.
"""
import json
import hashlib
from pathlib import Path
from typing import Optional, Dict, List, Set
from dataclasses import dataclass, asdict
from datetime import datetime
import difflib
@dataclass
class DocumentVersion:
"""Version information for a document."""
file_path: str
content_hash: str
size_bytes: int
last_modified: float
version: int
@dataclass
class ChangeSet:
"""Set of changes detected."""
added: List[DocumentVersion]
modified: List[DocumentVersion]
deleted: List[str]
unchanged: List[DocumentVersion]
@property
def has_changes(self) -> bool:
"""Check if there are any changes."""
return len(self.added) > 0 or len(self.modified) > 0 or len(self.deleted) > 0
@property
def total_changes(self) -> int:
"""Count total changes."""
return len(self.added) + len(self.modified) + len(self.deleted)
@dataclass
class UpdateMetadata:
"""Metadata for an incremental update."""
timestamp: str
previous_version: str
new_version: str
change_summary: Dict[str, int]
total_documents: int
class IncrementalUpdater:
"""
Manages incremental updates for skill documentation.
Tracks document versions, detects changes, and generates
delta packages for efficient updates.
"""
def __init__(self, skill_dir: Path, version_file: str = ".skill_version.json"):
"""
Initialize incremental updater.
Args:
skill_dir: Path to skill directory
version_file: Name of version tracking file
"""
self.skill_dir = Path(skill_dir)
self.version_file = self.skill_dir / version_file
self.current_versions: Dict[str, DocumentVersion] = {}
self.previous_versions: Dict[str, DocumentVersion] = {}
def _compute_file_hash(self, file_path: Path) -> str:
"""
Compute SHA256 hash of file content.
Args:
file_path: Path to file
Returns:
Hex digest of SHA256 hash
"""
sha256 = hashlib.sha256()
try:
with open(file_path, 'rb') as f:
while chunk := f.read(8192):
sha256.update(chunk)
return sha256.hexdigest()
except Exception as e:
print(f"⚠️ Warning: Failed to hash {file_path}: {e}")
return ""
def _scan_documents(self) -> Dict[str, DocumentVersion]:
"""
Scan skill directory and build version map.
Returns:
Dictionary mapping file paths to versions
"""
versions = {}
# Scan SKILL.md
skill_md = self.skill_dir / "SKILL.md"
if skill_md.exists():
versions["SKILL.md"] = DocumentVersion(
file_path="SKILL.md",
content_hash=self._compute_file_hash(skill_md),
size_bytes=skill_md.stat().st_size,
last_modified=skill_md.stat().st_mtime,
version=1
)
# Scan references
refs_dir = self.skill_dir / "references"
if refs_dir.exists():
for ref_file in refs_dir.glob("*.md"):
if ref_file.is_file() and not ref_file.name.startswith("."):
rel_path = f"references/{ref_file.name}"
versions[rel_path] = DocumentVersion(
file_path=rel_path,
content_hash=self._compute_file_hash(ref_file),
size_bytes=ref_file.stat().st_size,
last_modified=ref_file.stat().st_mtime,
version=1
)
return versions
def load_previous_versions(self) -> bool:
"""
Load previous version information from disk.
Returns:
True if versions loaded, False if no previous versions
"""
if not self.version_file.exists():
return False
try:
data = json.loads(self.version_file.read_text())
for file_path, version_dict in data.get("documents", {}).items():
self.previous_versions[file_path] = DocumentVersion(**version_dict)
return True
except Exception as e:
print(f"⚠️ Warning: Failed to load versions: {e}")
return False
def save_current_versions(self) -> None:
"""Save current version information to disk."""
data = {
"timestamp": datetime.now().isoformat(),
"version": "1.0.0",
"documents": {
file_path: asdict(version)
for file_path, version in self.current_versions.items()
}
}
self.version_file.write_text(json.dumps(data, indent=2))
def detect_changes(self) -> ChangeSet:
"""
Detect changes between previous and current versions.
Returns:
ChangeSet describing all changes
"""
# Scan current state
self.current_versions = self._scan_documents()
# Load previous state
has_previous = self.load_previous_versions()
if not has_previous:
# First time - all files are "added"
return ChangeSet(
added=list(self.current_versions.values()),
modified=[],
deleted=[],
unchanged=[]
)
# Detect changes
added = []
modified = []
deleted = []
unchanged = []
current_files = set(self.current_versions.keys())
previous_files = set(self.previous_versions.keys())
# Added files
for file_path in current_files - previous_files:
added.append(self.current_versions[file_path])
# Deleted files
for file_path in previous_files - current_files:
deleted.append(file_path)
# Check for modifications
for file_path in current_files & previous_files:
current = self.current_versions[file_path]
previous = self.previous_versions[file_path]
if current.content_hash != previous.content_hash:
# Increment version
current.version = previous.version + 1
modified.append(current)
else:
unchanged.append(current)
return ChangeSet(
added=added,
modified=modified,
deleted=deleted,
unchanged=unchanged
)
def generate_update_package(
self,
change_set: ChangeSet,
output_path: Path,
include_content: bool = True
) -> Path:
"""
Generate incremental update package.
Args:
change_set: Changes to include
output_path: Output path for package
include_content: Include full document content
Returns:
Path to created package
"""
output_path = Path(output_path)
# Build update package
update_data = {
"metadata": {
"timestamp": datetime.now().isoformat(),
"skill_name": self.skill_dir.name,
"change_summary": {
"added": len(change_set.added),
"modified": len(change_set.modified),
"deleted": len(change_set.deleted),
"unchanged": len(change_set.unchanged)
},
"total_changes": change_set.total_changes
},
"changes": {}
}
# Include changed documents
if include_content:
# Added documents
for doc in change_set.added:
file_path = self.skill_dir / doc.file_path
update_data["changes"][doc.file_path] = {
"action": "add",
"version": doc.version,
"content": file_path.read_text(encoding="utf-8"),
"hash": doc.content_hash,
"size": doc.size_bytes
}
# Modified documents
for doc in change_set.modified:
file_path = self.skill_dir / doc.file_path
update_data["changes"][doc.file_path] = {
"action": "modify",
"version": doc.version,
"content": file_path.read_text(encoding="utf-8"),
"hash": doc.content_hash,
"size": doc.size_bytes
}
# Deleted documents
for file_path in change_set.deleted:
update_data["changes"][file_path] = {
"action": "delete"
}
# Write package
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(update_data, indent=2, ensure_ascii=False))
return output_path
def generate_diff_report(self, change_set: ChangeSet) -> str:
"""
Generate human-readable diff report.
Args:
change_set: Changes to report
Returns:
Formatted report string
"""
lines = ["=" * 60]
lines.append("INCREMENTAL UPDATE REPORT")
lines.append("=" * 60)
lines.append("")
# Summary
lines.append("📊 Summary:")
lines.append(f" Added: {len(change_set.added)} files")
lines.append(f" Modified: {len(change_set.modified)} files")
lines.append(f" Deleted: {len(change_set.deleted)} files")
lines.append(f" Unchanged: {len(change_set.unchanged)} files")
lines.append(f" Total changes: {change_set.total_changes}")
lines.append("")
# Added files
if change_set.added:
lines.append(" Added Files:")
for doc in change_set.added:
lines.append(f" + {doc.file_path} ({doc.size_bytes:,} bytes)")
lines.append("")
# Modified files
if change_set.modified:
lines.append("📝 Modified Files:")
for doc in change_set.modified:
prev = self.previous_versions.get(doc.file_path)
if prev:
size_diff = doc.size_bytes - prev.size_bytes
size_str = f"{size_diff:+,} bytes" if size_diff != 0 else "same size"
lines.append(f" ~ {doc.file_path} (v{prev.version} → v{doc.version}, {size_str})")
else:
lines.append(f" ~ {doc.file_path} (v{doc.version})")
lines.append("")
# Deleted files
if change_set.deleted:
lines.append("🗑️ Deleted Files:")
for file_path in change_set.deleted:
lines.append(f" - {file_path}")
lines.append("")
# Content diffs for modified files
if change_set.modified:
lines.append("📄 Content Changes:")
for doc in change_set.modified:
prev = self.previous_versions.get(doc.file_path)
if prev:
lines.append(f"\n File: {doc.file_path}")
# Read current content
current_path = self.skill_dir / doc.file_path
current_content = current_path.read_text(encoding="utf-8").splitlines()
# Generate diff (simplified)
lines.append(f" Size: {prev.size_bytes:,}{doc.size_bytes:,} bytes")
lines.append(f" Hash: {prev.content_hash[:8]}... → {doc.content_hash[:8]}...")
lines.append("")
lines.append("=" * 60)
return "\n".join(lines)
def apply_update_package(self, package_path: Path) -> bool:
"""
Apply an incremental update package.
Args:
package_path: Path to update package
Returns:
True if successful
"""
try:
update_data = json.loads(Path(package_path).read_text())
print("📦 Applying incremental update...")
print(f" Timestamp: {update_data['metadata']['timestamp']}")
print(f" Changes: {update_data['metadata']['total_changes']}")
# Apply changes
for file_path, change in update_data["changes"].items():
action = change["action"]
full_path = self.skill_dir / file_path
if action == "add":
print(f" Adding: {file_path}")
full_path.parent.mkdir(parents=True, exist_ok=True)
full_path.write_text(change["content"], encoding="utf-8")
elif action == "modify":
print(f" 📝 Modifying: {file_path}")
full_path.write_text(change["content"], encoding="utf-8")
elif action == "delete":
print(f" 🗑️ Deleting: {file_path}")
if full_path.exists():
full_path.unlink()
print("✅ Update applied successfully!")
return True
except Exception as e:
print(f"❌ Failed to apply update: {e}")
return False
def main():
"""CLI entry point for incremental updates."""
import argparse
from pathlib import Path
parser = argparse.ArgumentParser(description="Detect and apply incremental skill updates")
parser.add_argument("skill_dir", help="Path to skill directory")
parser.add_argument("--check-changes", action="store_true", help="Check for changes only")
parser.add_argument("--generate-package", help="Generate update package at specified path")
parser.add_argument("--apply-update", help="Apply update package from specified path")
args = parser.parse_args()
skill_dir = Path(args.skill_dir)
if not skill_dir.exists():
print(f"❌ Error: Directory not found: {skill_dir}")
return 1
# Initialize updater
updater = IncrementalUpdater(skill_dir)
# Apply update if specified
if args.apply_update:
update_path = Path(args.apply_update)
if not update_path.exists():
print(f"❌ Error: Update package not found: {update_path}")
return 1
print(f"📥 Applying update from: {update_path}")
success = updater.apply_update_package(update_path)
return 0 if success else 1
# Detect changes
print("🔍 Detecting changes...")
change_set = updater.detect_changes()
# Generate report
report = updater.generate_diff_report(change_set)
print(report)
if args.check_changes:
return 0 if not change_set.has_changes else 1
if change_set.has_changes:
# Generate update package if specified
if args.generate_package:
package_path = Path(args.generate_package)
else:
package_path = skill_dir.parent / f"{skill_dir.name}-update.json"
print("\n📦 Generating update package...")
package_path = updater.generate_update_package(change_set, package_path)
print(f"✅ Package created: {package_path}")
# Save versions
updater.save_current_versions()
print(f"💾 Versions saved to: {updater.version_file}")
else:
print("\n✅ No changes detected - skill is up to date!")
return 0
if __name__ == "__main__":
import sys
sys.exit(main())