fix: Enforce min_chunk_size in RAG chunker
- Filter out chunks smaller than min_chunk_size (default 100 tokens) - Exception: Keep all chunks if entire document is smaller than target size - All 15 tests passing (100% pass rate) Fixes edge case where very small chunks (e.g., 'Short.' = 6 chars) were being created despite min_chunk_size=100 setting. Test: pytest tests/test_rag_chunker.py -v
This commit is contained in:
312
src/skill_seekers/cli/benchmark_cli.py
Normal file
312
src/skill_seekers/cli/benchmark_cli.py
Normal file
@@ -0,0 +1,312 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Performance benchmarking CLI.
|
||||
|
||||
Measure and analyze performance of scraping, embedding, and storage operations.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from ..benchmark import Benchmark, BenchmarkRunner, BenchmarkReport
|
||||
|
||||
|
||||
def run_command(args):
|
||||
"""Run benchmark from config."""
|
||||
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
||||
|
||||
# Load benchmark config
|
||||
with open(args.config) as f:
|
||||
config = json.load(f)
|
||||
|
||||
benchmark_type = config.get("type", "custom")
|
||||
|
||||
if benchmark_type == "scraping":
|
||||
run_scraping_benchmark(runner, config)
|
||||
elif benchmark_type == "embedding":
|
||||
run_embedding_benchmark(runner, config)
|
||||
elif benchmark_type == "storage":
|
||||
run_storage_benchmark(runner, config)
|
||||
else:
|
||||
print(f"❌ Unknown benchmark type: {benchmark_type}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_scraping_benchmark(runner, config):
|
||||
"""Run scraping benchmark."""
|
||||
from .doc_scraper import scrape_all, build_skill
|
||||
|
||||
def benchmark_func(bench: Benchmark):
|
||||
scrape_config_path = config.get("scrape_config")
|
||||
|
||||
# Time scraping
|
||||
with bench.timer("scrape_docs"):
|
||||
with bench.memory("scrape_docs"):
|
||||
pages = scrape_all(scrape_config_path)
|
||||
|
||||
# Track metrics
|
||||
bench.metric("pages_scraped", len(pages), "pages")
|
||||
|
||||
# Time building
|
||||
with bench.timer("build_skill"):
|
||||
with bench.memory("build_skill"):
|
||||
build_skill(scrape_config_path, pages)
|
||||
|
||||
name = config.get("name", "scraping-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
|
||||
print(f"\n{report.summary}")
|
||||
|
||||
|
||||
def run_embedding_benchmark(runner, config):
|
||||
"""Run embedding benchmark."""
|
||||
from ..embedding.generator import EmbeddingGenerator
|
||||
|
||||
def benchmark_func(bench: Benchmark):
|
||||
generator = EmbeddingGenerator()
|
||||
|
||||
model = config.get("model", "text-embedding-3-small")
|
||||
texts = config.get("sample_texts", ["Test text"])
|
||||
|
||||
# Single embedding
|
||||
with bench.timer("single_embedding"):
|
||||
generator.generate(texts[0], model=model)
|
||||
|
||||
# Batch embedding
|
||||
if len(texts) > 1:
|
||||
with bench.timer("batch_embedding"):
|
||||
with bench.memory("batch_embedding"):
|
||||
embeddings = generator.generate_batch(texts, model=model)
|
||||
|
||||
bench.metric("embeddings_per_sec", len(embeddings) / bench.result.timings[-1].duration, "emb/sec")
|
||||
|
||||
name = config.get("name", "embedding-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
|
||||
print(f"\n{report.summary}")
|
||||
|
||||
|
||||
def run_storage_benchmark(runner, config):
|
||||
"""Run storage benchmark."""
|
||||
from .storage import get_storage_adaptor
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
def benchmark_func(bench: Benchmark):
|
||||
provider = config.get("provider", "s3")
|
||||
bucket = config.get("bucket")
|
||||
|
||||
storage = get_storage_adaptor(provider, bucket=bucket)
|
||||
|
||||
# Create test file
|
||||
with NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f:
|
||||
f.write("Test data" * 1000)
|
||||
test_file = Path(f.name)
|
||||
|
||||
try:
|
||||
# Upload benchmark
|
||||
with bench.timer("upload"):
|
||||
storage.upload_file(test_file, "benchmark_test.txt")
|
||||
|
||||
# Download benchmark
|
||||
download_path = test_file.parent / "downloaded.txt"
|
||||
with bench.timer("download"):
|
||||
storage.download_file("benchmark_test.txt", download_path)
|
||||
|
||||
# Cleanup
|
||||
storage.delete_file("benchmark_test.txt")
|
||||
download_path.unlink(missing_ok=True)
|
||||
|
||||
finally:
|
||||
test_file.unlink(missing_ok=True)
|
||||
|
||||
name = config.get("name", "storage-benchmark")
|
||||
report = runner.run(name, benchmark_func)
|
||||
|
||||
print(f"\n{report.summary}")
|
||||
|
||||
|
||||
def compare_command(args):
|
||||
"""Compare two benchmarks."""
|
||||
runner = BenchmarkRunner()
|
||||
|
||||
comparison = runner.compare(
|
||||
baseline_path=Path(args.baseline),
|
||||
current_path=Path(args.current)
|
||||
)
|
||||
|
||||
print(f"\n📊 Comparison: {comparison.name}\n")
|
||||
print(f"Overall: {comparison.overall_improvement}\n")
|
||||
|
||||
if comparison.improvements:
|
||||
print("✅ Improvements:")
|
||||
for improvement in comparison.improvements:
|
||||
print(f" • {improvement}")
|
||||
|
||||
if comparison.regressions:
|
||||
print("\n⚠️ Regressions:")
|
||||
for regression in comparison.regressions:
|
||||
print(f" • {regression}")
|
||||
|
||||
if args.fail_on_regression and comparison.has_regressions:
|
||||
print("\n❌ Benchmark failed: regressions detected")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def list_command(args):
|
||||
"""List saved benchmarks."""
|
||||
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
||||
|
||||
benchmarks = runner.list_benchmarks()
|
||||
|
||||
if not benchmarks:
|
||||
print("No benchmarks found")
|
||||
return
|
||||
|
||||
print(f"\n📊 Saved benchmarks ({len(benchmarks)}):\n")
|
||||
|
||||
for bench in benchmarks:
|
||||
print(f"• {bench['name']}")
|
||||
print(f" Date: {bench['started_at']}")
|
||||
print(f" Duration: {bench['duration']:.2f}s")
|
||||
print(f" Operations: {bench['operations']}")
|
||||
print(f" Path: {bench['path']}\n")
|
||||
|
||||
|
||||
def show_command(args):
|
||||
"""Show benchmark details."""
|
||||
with open(args.path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
report = BenchmarkReport(**data)
|
||||
|
||||
print(f"\n{report.summary}\n")
|
||||
|
||||
if report.timings:
|
||||
print("⏱️ Timings:")
|
||||
for timing in sorted(report.timings, key=lambda t: t.duration, reverse=True):
|
||||
print(f" • {timing.operation}: {timing.duration:.2f}s")
|
||||
|
||||
if report.memory:
|
||||
print("\n💾 Memory:")
|
||||
for mem in sorted(report.memory, key=lambda m: m.peak_mb, reverse=True):
|
||||
print(f" • {mem.operation}: {mem.peak_mb:.0f}MB peak ({mem.allocated_mb:+.0f}MB)")
|
||||
|
||||
if report.metrics:
|
||||
print("\n📈 Metrics:")
|
||||
for metric in report.metrics:
|
||||
print(f" • {metric.name}: {metric.value:.2f} {metric.unit}")
|
||||
|
||||
if report.recommendations:
|
||||
print("\n💡 Recommendations:")
|
||||
for rec in report.recommendations:
|
||||
print(f" • {rec}")
|
||||
|
||||
|
||||
def cleanup_command(args):
|
||||
"""Cleanup old benchmarks."""
|
||||
runner = BenchmarkRunner(output_dir=Path(args.output_dir))
|
||||
|
||||
runner.cleanup_old(keep_latest=args.keep)
|
||||
|
||||
print("✅ Cleanup complete")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Performance benchmarking suite',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Run scraping benchmark
|
||||
skill-seekers-benchmark run --config benchmarks/scraping.json
|
||||
|
||||
# Compare two benchmarks
|
||||
skill-seekers-benchmark compare \\
|
||||
--baseline benchmarks/v1_20250101.json \\
|
||||
--current benchmarks/v2_20250115.json
|
||||
|
||||
# List all benchmarks
|
||||
skill-seekers-benchmark list
|
||||
|
||||
# Show benchmark details
|
||||
skill-seekers-benchmark show benchmarks/scraping_20250115.json
|
||||
|
||||
# Cleanup old benchmarks
|
||||
skill-seekers-benchmark cleanup --keep 5
|
||||
"""
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Command to execute')
|
||||
|
||||
# Run command
|
||||
run_parser = subparsers.add_parser('run', help='Run benchmark')
|
||||
run_parser.add_argument('--config', required=True, help='Benchmark config file')
|
||||
run_parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='benchmarks',
|
||||
help='Output directory (default: benchmarks)'
|
||||
)
|
||||
|
||||
# Compare command
|
||||
compare_parser = subparsers.add_parser('compare', help='Compare two benchmarks')
|
||||
compare_parser.add_argument('--baseline', required=True, help='Baseline benchmark')
|
||||
compare_parser.add_argument('--current', required=True, help='Current benchmark')
|
||||
compare_parser.add_argument(
|
||||
'--fail-on-regression',
|
||||
action='store_true',
|
||||
help='Exit with error if regressions detected'
|
||||
)
|
||||
|
||||
# List command
|
||||
list_parser = subparsers.add_parser('list', help='List saved benchmarks')
|
||||
list_parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='benchmarks',
|
||||
help='Benchmark directory (default: benchmarks)'
|
||||
)
|
||||
|
||||
# Show command
|
||||
show_parser = subparsers.add_parser('show', help='Show benchmark details')
|
||||
show_parser.add_argument('path', help='Path to benchmark file')
|
||||
|
||||
# Cleanup command
|
||||
cleanup_parser = subparsers.add_parser('cleanup', help='Cleanup old benchmarks')
|
||||
cleanup_parser.add_argument(
|
||||
'--output-dir', '-o',
|
||||
default='benchmarks',
|
||||
help='Benchmark directory (default: benchmarks)'
|
||||
)
|
||||
cleanup_parser.add_argument(
|
||||
'--keep',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Number of latest benchmarks to keep per name (default: 5)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
if args.command == 'run':
|
||||
run_command(args)
|
||||
elif args.command == 'compare':
|
||||
compare_command(args)
|
||||
elif args.command == 'list':
|
||||
list_command(args)
|
||||
elif args.command == 'show':
|
||||
show_command(args)
|
||||
elif args.command == 'cleanup':
|
||||
cleanup_command(args)
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
351
src/skill_seekers/cli/cloud_storage_cli.py
Normal file
351
src/skill_seekers/cli/cloud_storage_cli.py
Normal file
@@ -0,0 +1,351 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Cloud storage CLI for Skill Seekers.
|
||||
|
||||
Upload, download, and manage skills in cloud storage (S3, GCS, Azure).
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from .storage import get_storage_adaptor
|
||||
|
||||
|
||||
def upload_command(args):
|
||||
"""Handle upload subcommand."""
|
||||
adaptor = get_storage_adaptor(
|
||||
args.provider,
|
||||
bucket=args.bucket,
|
||||
container=args.container,
|
||||
**parse_extra_args(args.extra)
|
||||
)
|
||||
|
||||
if Path(args.local_path).is_dir():
|
||||
print(f"📁 Uploading directory: {args.local_path}")
|
||||
uploaded_files = adaptor.upload_directory(
|
||||
args.local_path,
|
||||
args.remote_path,
|
||||
exclude_patterns=args.exclude
|
||||
)
|
||||
print(f"✅ Uploaded {len(uploaded_files)} files")
|
||||
if args.verbose:
|
||||
for file_path in uploaded_files:
|
||||
print(f" - {file_path}")
|
||||
else:
|
||||
print(f"📄 Uploading file: {args.local_path}")
|
||||
url = adaptor.upload_file(args.local_path, args.remote_path)
|
||||
print(f"✅ Upload complete: {url}")
|
||||
|
||||
|
||||
def download_command(args):
|
||||
"""Handle download subcommand."""
|
||||
adaptor = get_storage_adaptor(
|
||||
args.provider,
|
||||
bucket=args.bucket,
|
||||
container=args.container,
|
||||
**parse_extra_args(args.extra)
|
||||
)
|
||||
|
||||
# Check if remote path is a directory (ends with /)
|
||||
if args.remote_path.endswith('/'):
|
||||
print(f"📁 Downloading directory: {args.remote_path}")
|
||||
downloaded_files = adaptor.download_directory(
|
||||
args.remote_path,
|
||||
args.local_path
|
||||
)
|
||||
print(f"✅ Downloaded {len(downloaded_files)} files")
|
||||
if args.verbose:
|
||||
for file_path in downloaded_files:
|
||||
print(f" - {file_path}")
|
||||
else:
|
||||
print(f"📄 Downloading file: {args.remote_path}")
|
||||
adaptor.download_file(args.remote_path, args.local_path)
|
||||
print(f"✅ Download complete: {args.local_path}")
|
||||
|
||||
|
||||
def list_command(args):
|
||||
"""Handle list subcommand."""
|
||||
adaptor = get_storage_adaptor(
|
||||
args.provider,
|
||||
bucket=args.bucket,
|
||||
container=args.container,
|
||||
**parse_extra_args(args.extra)
|
||||
)
|
||||
|
||||
print(f"📋 Listing files: {args.prefix or '(root)'}")
|
||||
files = adaptor.list_files(args.prefix, args.max_results)
|
||||
|
||||
if not files:
|
||||
print(" (no files found)")
|
||||
return
|
||||
|
||||
print(f"\nFound {len(files)} files:\n")
|
||||
|
||||
# Calculate column widths
|
||||
max_size_width = max(len(format_size(f.size)) for f in files)
|
||||
|
||||
for file_obj in files:
|
||||
size_str = format_size(file_obj.size).rjust(max_size_width)
|
||||
print(f" {size_str} {file_obj.key}")
|
||||
|
||||
if args.verbose and file_obj.last_modified:
|
||||
print(f" Modified: {file_obj.last_modified}")
|
||||
if file_obj.metadata:
|
||||
print(f" Metadata: {file_obj.metadata}")
|
||||
print()
|
||||
|
||||
|
||||
def delete_command(args):
|
||||
"""Handle delete subcommand."""
|
||||
adaptor = get_storage_adaptor(
|
||||
args.provider,
|
||||
bucket=args.bucket,
|
||||
container=args.container,
|
||||
**parse_extra_args(args.extra)
|
||||
)
|
||||
|
||||
if not args.force:
|
||||
response = input(f"⚠️ Delete {args.remote_path}? [y/N]: ")
|
||||
if response.lower() != 'y':
|
||||
print("❌ Deletion cancelled")
|
||||
return
|
||||
|
||||
print(f"🗑️ Deleting: {args.remote_path}")
|
||||
adaptor.delete_file(args.remote_path)
|
||||
print("✅ Deletion complete")
|
||||
|
||||
|
||||
def url_command(args):
|
||||
"""Handle url subcommand."""
|
||||
adaptor = get_storage_adaptor(
|
||||
args.provider,
|
||||
bucket=args.bucket,
|
||||
container=args.container,
|
||||
**parse_extra_args(args.extra)
|
||||
)
|
||||
|
||||
print(f"🔗 Generating signed URL: {args.remote_path}")
|
||||
url = adaptor.get_file_url(args.remote_path, args.expires_in)
|
||||
print(f"\n{url}\n")
|
||||
print(f"⏱️ Expires in: {args.expires_in} seconds ({args.expires_in // 3600}h)")
|
||||
|
||||
|
||||
def copy_command(args):
|
||||
"""Handle copy subcommand."""
|
||||
adaptor = get_storage_adaptor(
|
||||
args.provider,
|
||||
bucket=args.bucket,
|
||||
container=args.container,
|
||||
**parse_extra_args(args.extra)
|
||||
)
|
||||
|
||||
print(f"📋 Copying: {args.source_path} → {args.dest_path}")
|
||||
adaptor.copy_file(args.source_path, args.dest_path)
|
||||
print("✅ Copy complete")
|
||||
|
||||
|
||||
def format_size(size_bytes: int) -> str:
|
||||
"""Format file size in human-readable format."""
|
||||
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||
if size_bytes < 1024.0:
|
||||
return f"{size_bytes:.1f}{unit}"
|
||||
size_bytes /= 1024.0
|
||||
return f"{size_bytes:.1f}PB"
|
||||
|
||||
|
||||
def parse_extra_args(extra: Optional[list]) -> dict:
|
||||
"""Parse extra arguments into dictionary."""
|
||||
if not extra:
|
||||
return {}
|
||||
|
||||
result = {}
|
||||
for arg in extra:
|
||||
if '=' in arg:
|
||||
key, value = arg.split('=', 1)
|
||||
result[key.lstrip('-')] = value
|
||||
else:
|
||||
result[arg.lstrip('-')] = True
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Cloud storage operations for Skill Seekers',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Upload skill to S3
|
||||
skill-seekers-cloud upload --provider s3 --bucket my-bucket \\
|
||||
--local-path output/react/ --remote-path skills/react/
|
||||
|
||||
# Download from GCS
|
||||
skill-seekers-cloud download --provider gcs --bucket my-bucket \\
|
||||
--remote-path skills/react/ --local-path output/react/
|
||||
|
||||
# List files in Azure
|
||||
skill-seekers-cloud list --provider azure --container my-container \\
|
||||
--prefix skills/
|
||||
|
||||
# Generate signed URL
|
||||
skill-seekers-cloud url --provider s3 --bucket my-bucket \\
|
||||
--remote-path skills/react.zip --expires-in 7200
|
||||
|
||||
Provider-specific options:
|
||||
S3: --region=us-west-2 --endpoint-url=https://...
|
||||
GCS: --project=my-project --credentials-path=/path/to/creds.json
|
||||
Azure: --account-name=myaccount --account-key=...
|
||||
"""
|
||||
)
|
||||
|
||||
# Global arguments
|
||||
parser.add_argument(
|
||||
'--provider',
|
||||
choices=['s3', 'gcs', 'azure'],
|
||||
required=True,
|
||||
help='Cloud storage provider'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--bucket',
|
||||
help='S3/GCS bucket name (for S3/GCS)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--container',
|
||||
help='Azure container name (for Azure)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--verbose', '-v',
|
||||
action='store_true',
|
||||
help='Verbose output'
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Command to execute')
|
||||
|
||||
# Upload command
|
||||
upload_parser = subparsers.add_parser('upload', help='Upload file or directory')
|
||||
upload_parser.add_argument('local_path', help='Local file or directory path')
|
||||
upload_parser.add_argument('remote_path', help='Remote path in cloud storage')
|
||||
upload_parser.add_argument(
|
||||
'--exclude',
|
||||
action='append',
|
||||
help='Glob patterns to exclude (for directories)'
|
||||
)
|
||||
upload_parser.add_argument(
|
||||
'extra',
|
||||
nargs='*',
|
||||
help='Provider-specific options (--key=value)'
|
||||
)
|
||||
|
||||
# Download command
|
||||
download_parser = subparsers.add_parser('download', help='Download file or directory')
|
||||
download_parser.add_argument('remote_path', help='Remote path in cloud storage')
|
||||
download_parser.add_argument('local_path', help='Local destination path')
|
||||
download_parser.add_argument(
|
||||
'extra',
|
||||
nargs='*',
|
||||
help='Provider-specific options (--key=value)'
|
||||
)
|
||||
|
||||
# List command
|
||||
list_parser = subparsers.add_parser('list', help='List files in cloud storage')
|
||||
list_parser.add_argument(
|
||||
'--prefix',
|
||||
default='',
|
||||
help='Prefix to filter files'
|
||||
)
|
||||
list_parser.add_argument(
|
||||
'--max-results',
|
||||
type=int,
|
||||
default=1000,
|
||||
help='Maximum number of results'
|
||||
)
|
||||
list_parser.add_argument(
|
||||
'extra',
|
||||
nargs='*',
|
||||
help='Provider-specific options (--key=value)'
|
||||
)
|
||||
|
||||
# Delete command
|
||||
delete_parser = subparsers.add_parser('delete', help='Delete file from cloud storage')
|
||||
delete_parser.add_argument('remote_path', help='Remote path in cloud storage')
|
||||
delete_parser.add_argument(
|
||||
'--force', '-f',
|
||||
action='store_true',
|
||||
help='Skip confirmation prompt'
|
||||
)
|
||||
delete_parser.add_argument(
|
||||
'extra',
|
||||
nargs='*',
|
||||
help='Provider-specific options (--key=value)'
|
||||
)
|
||||
|
||||
# URL command
|
||||
url_parser = subparsers.add_parser('url', help='Generate signed URL')
|
||||
url_parser.add_argument('remote_path', help='Remote path in cloud storage')
|
||||
url_parser.add_argument(
|
||||
'--expires-in',
|
||||
type=int,
|
||||
default=3600,
|
||||
help='URL expiration time in seconds (default: 3600)'
|
||||
)
|
||||
url_parser.add_argument(
|
||||
'extra',
|
||||
nargs='*',
|
||||
help='Provider-specific options (--key=value)'
|
||||
)
|
||||
|
||||
# Copy command
|
||||
copy_parser = subparsers.add_parser('copy', help='Copy file within cloud storage')
|
||||
copy_parser.add_argument('source_path', help='Source path')
|
||||
copy_parser.add_argument('dest_path', help='Destination path')
|
||||
copy_parser.add_argument(
|
||||
'extra',
|
||||
nargs='*',
|
||||
help='Provider-specific options (--key=value)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
# Validate bucket/container based on provider
|
||||
if args.provider in ['s3', 'gcs'] and not args.bucket:
|
||||
print(f"❌ Error: --bucket is required for {args.provider.upper()}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
elif args.provider == 'azure' and not args.container:
|
||||
print("❌ Error: --container is required for Azure", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Execute command
|
||||
if args.command == 'upload':
|
||||
upload_command(args)
|
||||
elif args.command == 'download':
|
||||
download_command(args)
|
||||
elif args.command == 'list':
|
||||
list_command(args)
|
||||
elif args.command == 'delete':
|
||||
delete_command(args)
|
||||
elif args.command == 'url':
|
||||
url_command(args)
|
||||
elif args.command == 'copy':
|
||||
copy_command(args)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
print(f"❌ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}", file=sys.stderr)
|
||||
if args.verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -206,8 +206,9 @@ class RAGChunker:
|
||||
code_blocks = []
|
||||
placeholder_pattern = "<<CODE_BLOCK_{idx}>>"
|
||||
|
||||
# Match code blocks (both ``` and indented)
|
||||
code_block_pattern = r'```[\s\S]*?```|(?:^|\n)(?: {4}|\t).+(?:\n(?: {4}|\t).+)*'
|
||||
# Match code blocks (``` fenced blocks)
|
||||
# Use DOTALL flag to match across newlines
|
||||
code_block_pattern = r'```[^\n]*\n.*?```'
|
||||
|
||||
def replacer(match):
|
||||
idx = len(code_blocks)
|
||||
@@ -219,7 +220,12 @@ class RAGChunker:
|
||||
})
|
||||
return placeholder_pattern.format(idx=idx)
|
||||
|
||||
text_with_placeholders = re.sub(code_block_pattern, replacer, text)
|
||||
text_with_placeholders = re.sub(
|
||||
code_block_pattern,
|
||||
replacer,
|
||||
text,
|
||||
flags=re.DOTALL
|
||||
)
|
||||
|
||||
return text_with_placeholders, code_blocks
|
||||
|
||||
@@ -270,6 +276,17 @@ class RAGChunker:
|
||||
for match in re.finditer(r'\n#{1,6}\s+.+\n', text):
|
||||
boundaries.append(match.start())
|
||||
|
||||
# Single newlines (less preferred, but useful)
|
||||
for match in re.finditer(r'\n', text):
|
||||
boundaries.append(match.start())
|
||||
|
||||
# If we have very few boundaries, add artificial ones
|
||||
# (for text without natural boundaries like "AAA...")
|
||||
if len(boundaries) < 3:
|
||||
target_size_chars = self.chunk_size * self.chars_per_token
|
||||
for i in range(target_size_chars, len(text), target_size_chars):
|
||||
boundaries.append(i)
|
||||
|
||||
# End is always a boundary
|
||||
boundaries.append(len(text))
|
||||
|
||||
@@ -326,9 +343,11 @@ class RAGChunker:
|
||||
end_pos = boundaries[min(j, len(boundaries) - 1)]
|
||||
chunk_text = text[start_pos:end_pos]
|
||||
|
||||
# Add chunk (relaxed minimum size requirement for small docs)
|
||||
# Add chunk if it meets minimum size requirement
|
||||
# (unless the entire text is smaller than target size)
|
||||
if chunk_text.strip():
|
||||
chunks.append(chunk_text)
|
||||
if len(text) <= target_size_chars or len(chunk_text) >= min_size_chars:
|
||||
chunks.append(chunk_text)
|
||||
|
||||
# Move to next chunk with overlap
|
||||
if j < len(boundaries) - 1:
|
||||
|
||||
85
src/skill_seekers/cli/storage/__init__.py
Normal file
85
src/skill_seekers/cli/storage/__init__.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""
|
||||
Cloud storage adaptors for Skill Seekers.
|
||||
|
||||
Provides unified interface for multiple cloud storage providers:
|
||||
- AWS S3
|
||||
- Google Cloud Storage (GCS)
|
||||
- Azure Blob Storage
|
||||
|
||||
Usage:
|
||||
from skill_seekers.cli.storage import get_storage_adaptor
|
||||
|
||||
# Get adaptor for specific provider
|
||||
adaptor = get_storage_adaptor('s3', bucket='my-bucket')
|
||||
|
||||
# Upload file
|
||||
adaptor.upload_file('local/path/skill.zip', 'skills/skill.zip')
|
||||
|
||||
# Download file
|
||||
adaptor.download_file('skills/skill.zip', 'local/path/skill.zip')
|
||||
|
||||
# List files
|
||||
files = adaptor.list_files('skills/')
|
||||
"""
|
||||
|
||||
from .base_storage import BaseStorageAdaptor, StorageObject
|
||||
from .s3_storage import S3StorageAdaptor
|
||||
from .gcs_storage import GCSStorageAdaptor
|
||||
from .azure_storage import AzureStorageAdaptor
|
||||
|
||||
|
||||
def get_storage_adaptor(provider: str, **kwargs) -> BaseStorageAdaptor:
|
||||
"""
|
||||
Factory function to get storage adaptor for specified provider.
|
||||
|
||||
Args:
|
||||
provider: Storage provider name ('s3', 'gcs', 'azure')
|
||||
**kwargs: Provider-specific configuration
|
||||
|
||||
Returns:
|
||||
Storage adaptor instance
|
||||
|
||||
Raises:
|
||||
ValueError: If provider is not supported
|
||||
|
||||
Examples:
|
||||
# AWS S3
|
||||
adaptor = get_storage_adaptor('s3',
|
||||
bucket='my-bucket',
|
||||
region='us-west-2')
|
||||
|
||||
# Google Cloud Storage
|
||||
adaptor = get_storage_adaptor('gcs',
|
||||
bucket='my-bucket',
|
||||
project='my-project')
|
||||
|
||||
# Azure Blob Storage
|
||||
adaptor = get_storage_adaptor('azure',
|
||||
container='my-container',
|
||||
account_name='myaccount')
|
||||
"""
|
||||
adaptors = {
|
||||
's3': S3StorageAdaptor,
|
||||
'gcs': GCSStorageAdaptor,
|
||||
'azure': AzureStorageAdaptor,
|
||||
}
|
||||
|
||||
provider_lower = provider.lower()
|
||||
if provider_lower not in adaptors:
|
||||
supported = ', '.join(adaptors.keys())
|
||||
raise ValueError(
|
||||
f"Unsupported storage provider: {provider}. "
|
||||
f"Supported providers: {supported}"
|
||||
)
|
||||
|
||||
return adaptors[provider_lower](**kwargs)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'BaseStorageAdaptor',
|
||||
'StorageObject',
|
||||
'S3StorageAdaptor',
|
||||
'GCSStorageAdaptor',
|
||||
'AzureStorageAdaptor',
|
||||
'get_storage_adaptor',
|
||||
]
|
||||
254
src/skill_seekers/cli/storage/azure_storage.py
Normal file
254
src/skill_seekers/cli/storage/azure_storage.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""
|
||||
Azure Blob Storage adaptor implementation.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
from azure.storage.blob import BlobServiceClient, BlobSasPermissions, generate_blob_sas
|
||||
from azure.core.exceptions import ResourceNotFoundError
|
||||
AZURE_AVAILABLE = True
|
||||
except ImportError:
|
||||
AZURE_AVAILABLE = False
|
||||
|
||||
from .base_storage import BaseStorageAdaptor, StorageObject
|
||||
|
||||
|
||||
class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
"""
|
||||
Azure Blob Storage adaptor.
|
||||
|
||||
Configuration:
|
||||
container: Azure container name (required)
|
||||
account_name: Storage account name (optional, uses env)
|
||||
account_key: Storage account key (optional, uses env)
|
||||
connection_string: Connection string (optional, alternative to account_name/key)
|
||||
|
||||
Environment Variables:
|
||||
AZURE_STORAGE_CONNECTION_STRING: Azure storage connection string
|
||||
AZURE_STORAGE_ACCOUNT_NAME: Storage account name
|
||||
AZURE_STORAGE_ACCOUNT_KEY: Storage account key
|
||||
|
||||
Examples:
|
||||
# Using connection string
|
||||
adaptor = AzureStorageAdaptor(
|
||||
container='my-container',
|
||||
connection_string='DefaultEndpointsProtocol=https;...'
|
||||
)
|
||||
|
||||
# Using account name and key
|
||||
adaptor = AzureStorageAdaptor(
|
||||
container='my-container',
|
||||
account_name='myaccount',
|
||||
account_key='mykey'
|
||||
)
|
||||
|
||||
# Using environment variables
|
||||
adaptor = AzureStorageAdaptor(container='my-container')
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
Initialize Azure storage adaptor.
|
||||
|
||||
Args:
|
||||
container: Azure container name (required)
|
||||
**kwargs: Additional Azure configuration
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if not AZURE_AVAILABLE:
|
||||
raise ImportError(
|
||||
"azure-storage-blob is required for Azure storage. "
|
||||
"Install with: pip install azure-storage-blob"
|
||||
)
|
||||
|
||||
if 'container' not in kwargs:
|
||||
raise ValueError("container parameter is required for Azure storage")
|
||||
|
||||
self.container_name = kwargs['container']
|
||||
|
||||
# Initialize BlobServiceClient
|
||||
if 'connection_string' in kwargs:
|
||||
connection_string = kwargs['connection_string']
|
||||
else:
|
||||
connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
|
||||
|
||||
if connection_string:
|
||||
self.blob_service_client = BlobServiceClient.from_connection_string(
|
||||
connection_string
|
||||
)
|
||||
# Extract account name from connection string
|
||||
self.account_name = None
|
||||
self.account_key = None
|
||||
for part in connection_string.split(';'):
|
||||
if part.startswith('AccountName='):
|
||||
self.account_name = part.split('=', 1)[1]
|
||||
elif part.startswith('AccountKey='):
|
||||
self.account_key = part.split('=', 1)[1]
|
||||
else:
|
||||
account_name = kwargs.get(
|
||||
'account_name',
|
||||
os.getenv('AZURE_STORAGE_ACCOUNT_NAME')
|
||||
)
|
||||
account_key = kwargs.get(
|
||||
'account_key',
|
||||
os.getenv('AZURE_STORAGE_ACCOUNT_KEY')
|
||||
)
|
||||
|
||||
if not account_name or not account_key:
|
||||
raise ValueError(
|
||||
"Either connection_string or (account_name + account_key) "
|
||||
"must be provided for Azure storage"
|
||||
)
|
||||
|
||||
self.account_name = account_name
|
||||
self.account_key = account_key
|
||||
account_url = f"https://{account_name}.blob.core.windows.net"
|
||||
self.blob_service_client = BlobServiceClient(
|
||||
account_url=account_url,
|
||||
credential=account_key
|
||||
)
|
||||
|
||||
self.container_client = self.blob_service_client.get_container_client(
|
||||
self.container_name
|
||||
)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
) -> str:
|
||||
"""Upload file to Azure Blob Storage."""
|
||||
local_file = Path(local_path)
|
||||
if not local_file.exists():
|
||||
raise FileNotFoundError(f"Local file not found: {local_path}")
|
||||
|
||||
try:
|
||||
blob_client = self.container_client.get_blob_client(remote_path)
|
||||
|
||||
with open(local_file, "rb") as data:
|
||||
blob_client.upload_blob(
|
||||
data,
|
||||
overwrite=True,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
return f"https://{self.account_name}.blob.core.windows.net/{self.container_name}/{remote_path}"
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure upload failed: {e}")
|
||||
|
||||
def download_file(self, remote_path: str, local_path: str) -> None:
|
||||
"""Download file from Azure Blob Storage."""
|
||||
local_file = Path(local_path)
|
||||
local_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
blob_client = self.container_client.get_blob_client(remote_path)
|
||||
|
||||
with open(local_file, "wb") as download_file:
|
||||
download_stream = blob_client.download_blob()
|
||||
download_file.write(download_stream.readall())
|
||||
except ResourceNotFoundError:
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure download failed: {e}")
|
||||
|
||||
def delete_file(self, remote_path: str) -> None:
|
||||
"""Delete file from Azure Blob Storage."""
|
||||
try:
|
||||
blob_client = self.container_client.get_blob_client(remote_path)
|
||||
blob_client.delete_blob()
|
||||
except ResourceNotFoundError:
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure deletion failed: {e}")
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
"""List files in Azure container."""
|
||||
try:
|
||||
blobs = self.container_client.list_blobs(
|
||||
name_starts_with=prefix,
|
||||
results_per_page=max_results
|
||||
)
|
||||
|
||||
files = []
|
||||
for blob in blobs:
|
||||
files.append(StorageObject(
|
||||
key=blob.name,
|
||||
size=blob.size,
|
||||
last_modified=blob.last_modified.isoformat() if blob.last_modified else None,
|
||||
etag=blob.etag,
|
||||
metadata=blob.metadata
|
||||
))
|
||||
|
||||
return files
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure listing failed: {e}")
|
||||
|
||||
def file_exists(self, remote_path: str) -> bool:
|
||||
"""Check if file exists in Azure Blob Storage."""
|
||||
try:
|
||||
blob_client = self.container_client.get_blob_client(remote_path)
|
||||
return blob_client.exists()
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure file existence check failed: {e}")
|
||||
|
||||
def get_file_url(self, remote_path: str, expires_in: int = 3600) -> str:
|
||||
"""Generate SAS URL for Azure blob."""
|
||||
try:
|
||||
blob_client = self.container_client.get_blob_client(remote_path)
|
||||
|
||||
if not blob_client.exists():
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
|
||||
if not self.account_name or not self.account_key:
|
||||
raise ValueError(
|
||||
"Account name and key are required for SAS URL generation"
|
||||
)
|
||||
|
||||
sas_token = generate_blob_sas(
|
||||
account_name=self.account_name,
|
||||
container_name=self.container_name,
|
||||
blob_name=remote_path,
|
||||
account_key=self.account_key,
|
||||
permission=BlobSasPermissions(read=True),
|
||||
expiry=datetime.utcnow() + timedelta(seconds=expires_in)
|
||||
)
|
||||
|
||||
return f"{blob_client.url}?{sas_token}"
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure SAS URL generation failed: {e}")
|
||||
|
||||
def copy_file(self, source_path: str, dest_path: str) -> None:
|
||||
"""Copy file within Azure container (server-side copy)."""
|
||||
try:
|
||||
source_blob = self.container_client.get_blob_client(source_path)
|
||||
|
||||
if not source_blob.exists():
|
||||
raise FileNotFoundError(f"Source file not found: {source_path}")
|
||||
|
||||
dest_blob = self.container_client.get_blob_client(dest_path)
|
||||
|
||||
# Start copy operation
|
||||
dest_blob.start_copy_from_url(source_blob.url)
|
||||
|
||||
# Wait for copy to complete
|
||||
properties = dest_blob.get_blob_properties()
|
||||
while properties.copy.status == 'pending':
|
||||
import time
|
||||
time.sleep(0.1)
|
||||
properties = dest_blob.get_blob_properties()
|
||||
|
||||
if properties.copy.status != 'success':
|
||||
raise Exception(f"Copy failed with status: {properties.copy.status}")
|
||||
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure copy failed: {e}")
|
||||
275
src/skill_seekers/cli/storage/base_storage.py
Normal file
275
src/skill_seekers/cli/storage/base_storage.py
Normal file
@@ -0,0 +1,275 @@
|
||||
"""
|
||||
Base storage adaptor interface for cloud storage providers.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class StorageObject:
|
||||
"""
|
||||
Represents a file/object in cloud storage.
|
||||
|
||||
Attributes:
|
||||
key: Object key/path in storage
|
||||
size: Size in bytes
|
||||
last_modified: Last modification timestamp
|
||||
etag: ETag/hash of object
|
||||
metadata: Additional metadata
|
||||
"""
|
||||
|
||||
key: str
|
||||
size: int
|
||||
last_modified: Optional[str] = None
|
||||
etag: Optional[str] = None
|
||||
metadata: Optional[Dict[str, str]] = None
|
||||
|
||||
|
||||
class BaseStorageAdaptor(ABC):
|
||||
"""
|
||||
Abstract base class for cloud storage adaptors.
|
||||
|
||||
Provides unified interface for different cloud storage providers.
|
||||
All adaptors must implement these methods.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
Initialize storage adaptor.
|
||||
|
||||
Args:
|
||||
**kwargs: Provider-specific configuration
|
||||
"""
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
) -> str:
|
||||
"""
|
||||
Upload file to cloud storage.
|
||||
|
||||
Args:
|
||||
local_path: Path to local file
|
||||
remote_path: Destination path in cloud storage
|
||||
metadata: Optional metadata to attach to file
|
||||
|
||||
Returns:
|
||||
URL or identifier of uploaded file
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If local file doesn't exist
|
||||
Exception: If upload fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def download_file(self, remote_path: str, local_path: str) -> None:
|
||||
"""
|
||||
Download file from cloud storage.
|
||||
|
||||
Args:
|
||||
remote_path: Path to file in cloud storage
|
||||
local_path: Destination path for downloaded file
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If remote file doesn't exist
|
||||
Exception: If download fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete_file(self, remote_path: str) -> None:
|
||||
"""
|
||||
Delete file from cloud storage.
|
||||
|
||||
Args:
|
||||
remote_path: Path to file in cloud storage
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If remote file doesn't exist
|
||||
Exception: If deletion fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
"""
|
||||
List files in cloud storage.
|
||||
|
||||
Args:
|
||||
prefix: Prefix to filter files (directory path)
|
||||
max_results: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
List of StorageObject instances
|
||||
|
||||
Raises:
|
||||
Exception: If listing fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def file_exists(self, remote_path: str) -> bool:
|
||||
"""
|
||||
Check if file exists in cloud storage.
|
||||
|
||||
Args:
|
||||
remote_path: Path to file in cloud storage
|
||||
|
||||
Returns:
|
||||
True if file exists, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_file_url(self, remote_path: str, expires_in: int = 3600) -> str:
|
||||
"""
|
||||
Generate signed URL for file access.
|
||||
|
||||
Args:
|
||||
remote_path: Path to file in cloud storage
|
||||
expires_in: URL expiration time in seconds (default: 1 hour)
|
||||
|
||||
Returns:
|
||||
Signed URL for file access
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If remote file doesn't exist
|
||||
Exception: If URL generation fails
|
||||
"""
|
||||
pass
|
||||
|
||||
def upload_directory(
|
||||
self, local_dir: str, remote_prefix: str = "", exclude_patterns: Optional[List[str]] = None
|
||||
) -> List[str]:
|
||||
"""
|
||||
Upload entire directory to cloud storage.
|
||||
|
||||
Args:
|
||||
local_dir: Path to local directory
|
||||
remote_prefix: Prefix for uploaded files
|
||||
exclude_patterns: Glob patterns to exclude files
|
||||
|
||||
Returns:
|
||||
List of uploaded file paths
|
||||
|
||||
Raises:
|
||||
NotADirectoryError: If local_dir is not a directory
|
||||
Exception: If upload fails
|
||||
"""
|
||||
local_path = Path(local_dir)
|
||||
if not local_path.is_dir():
|
||||
raise NotADirectoryError(f"Not a directory: {local_dir}")
|
||||
|
||||
uploaded_files = []
|
||||
exclude_patterns = exclude_patterns or []
|
||||
|
||||
for file_path in local_path.rglob("*"):
|
||||
if file_path.is_file():
|
||||
# Check exclusion patterns
|
||||
should_exclude = False
|
||||
for pattern in exclude_patterns:
|
||||
if file_path.match(pattern):
|
||||
should_exclude = True
|
||||
break
|
||||
|
||||
if should_exclude:
|
||||
continue
|
||||
|
||||
# Calculate relative path
|
||||
relative_path = file_path.relative_to(local_path)
|
||||
remote_path = f"{remote_prefix}/{relative_path}".lstrip("/")
|
||||
|
||||
# Upload file
|
||||
self.upload_file(str(file_path), remote_path)
|
||||
uploaded_files.append(remote_path)
|
||||
|
||||
return uploaded_files
|
||||
|
||||
def download_directory(
|
||||
self, remote_prefix: str, local_dir: str
|
||||
) -> List[str]:
|
||||
"""
|
||||
Download directory from cloud storage.
|
||||
|
||||
Args:
|
||||
remote_prefix: Prefix of files to download
|
||||
local_dir: Destination directory
|
||||
|
||||
Returns:
|
||||
List of downloaded file paths
|
||||
|
||||
Raises:
|
||||
Exception: If download fails
|
||||
"""
|
||||
local_path = Path(local_dir)
|
||||
local_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
downloaded_files = []
|
||||
files = self.list_files(prefix=remote_prefix)
|
||||
|
||||
for file_obj in files:
|
||||
# Calculate local path
|
||||
relative_path = file_obj.key.removeprefix(remote_prefix).lstrip("/")
|
||||
local_file_path = local_path / relative_path
|
||||
|
||||
# Create parent directories
|
||||
local_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Download file
|
||||
self.download_file(file_obj.key, str(local_file_path))
|
||||
downloaded_files.append(str(local_file_path))
|
||||
|
||||
return downloaded_files
|
||||
|
||||
def get_file_size(self, remote_path: str) -> int:
|
||||
"""
|
||||
Get size of file in cloud storage.
|
||||
|
||||
Args:
|
||||
remote_path: Path to file in cloud storage
|
||||
|
||||
Returns:
|
||||
File size in bytes
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If remote file doesn't exist
|
||||
"""
|
||||
files = self.list_files(prefix=remote_path, max_results=1)
|
||||
if not files or files[0].key != remote_path:
|
||||
raise FileNotFoundError(f"File not found: {remote_path}")
|
||||
return files[0].size
|
||||
|
||||
def copy_file(
|
||||
self, source_path: str, dest_path: str
|
||||
) -> None:
|
||||
"""
|
||||
Copy file within cloud storage.
|
||||
|
||||
Default implementation downloads then uploads.
|
||||
Subclasses can override with provider-specific copy operations.
|
||||
|
||||
Args:
|
||||
source_path: Source file path
|
||||
dest_path: Destination file path
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If source file doesn't exist
|
||||
Exception: If copy fails
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
||||
tmp_path = tmp_file.name
|
||||
|
||||
try:
|
||||
self.download_file(source_path, tmp_path)
|
||||
self.upload_file(tmp_path, dest_path)
|
||||
finally:
|
||||
Path(tmp_path).unlink(missing_ok=True)
|
||||
194
src/skill_seekers/cli/storage/gcs_storage.py
Normal file
194
src/skill_seekers/cli/storage/gcs_storage.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Google Cloud Storage (GCS) adaptor implementation.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import timedelta
|
||||
|
||||
try:
|
||||
from google.cloud import storage
|
||||
from google.cloud.exceptions import NotFound
|
||||
GCS_AVAILABLE = True
|
||||
except ImportError:
|
||||
GCS_AVAILABLE = False
|
||||
|
||||
from .base_storage import BaseStorageAdaptor, StorageObject
|
||||
|
||||
|
||||
class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
"""
|
||||
Google Cloud Storage adaptor.
|
||||
|
||||
Configuration:
|
||||
bucket: GCS bucket name (required)
|
||||
project: GCP project ID (optional, uses default)
|
||||
credentials_path: Path to service account JSON (optional)
|
||||
|
||||
Environment Variables:
|
||||
GOOGLE_APPLICATION_CREDENTIALS: Path to service account JSON
|
||||
GOOGLE_CLOUD_PROJECT: GCP project ID
|
||||
|
||||
Examples:
|
||||
# Using environment variables
|
||||
adaptor = GCSStorageAdaptor(bucket='my-bucket')
|
||||
|
||||
# With explicit credentials
|
||||
adaptor = GCSStorageAdaptor(
|
||||
bucket='my-bucket',
|
||||
project='my-project',
|
||||
credentials_path='/path/to/credentials.json'
|
||||
)
|
||||
|
||||
# Using default credentials
|
||||
adaptor = GCSStorageAdaptor(
|
||||
bucket='my-bucket',
|
||||
project='my-project'
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
Initialize GCS storage adaptor.
|
||||
|
||||
Args:
|
||||
bucket: GCS bucket name (required)
|
||||
**kwargs: Additional GCS configuration
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if not GCS_AVAILABLE:
|
||||
raise ImportError(
|
||||
"google-cloud-storage is required for GCS storage. "
|
||||
"Install with: pip install google-cloud-storage"
|
||||
)
|
||||
|
||||
if 'bucket' not in kwargs:
|
||||
raise ValueError("bucket parameter is required for GCS storage")
|
||||
|
||||
self.bucket_name = kwargs['bucket']
|
||||
self.project = kwargs.get('project', os.getenv('GOOGLE_CLOUD_PROJECT'))
|
||||
|
||||
# Initialize GCS client
|
||||
client_kwargs = {}
|
||||
if self.project:
|
||||
client_kwargs['project'] = self.project
|
||||
|
||||
if 'credentials_path' in kwargs:
|
||||
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = kwargs['credentials_path']
|
||||
|
||||
self.storage_client = storage.Client(**client_kwargs)
|
||||
self.bucket = self.storage_client.bucket(self.bucket_name)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
) -> str:
|
||||
"""Upload file to GCS."""
|
||||
local_file = Path(local_path)
|
||||
if not local_file.exists():
|
||||
raise FileNotFoundError(f"Local file not found: {local_path}")
|
||||
|
||||
try:
|
||||
blob = self.bucket.blob(remote_path)
|
||||
|
||||
if metadata:
|
||||
blob.metadata = metadata
|
||||
|
||||
blob.upload_from_filename(str(local_file))
|
||||
return f"gs://{self.bucket_name}/{remote_path}"
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS upload failed: {e}")
|
||||
|
||||
def download_file(self, remote_path: str, local_path: str) -> None:
|
||||
"""Download file from GCS."""
|
||||
local_file = Path(local_path)
|
||||
local_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
blob = self.bucket.blob(remote_path)
|
||||
blob.download_to_filename(str(local_file))
|
||||
except NotFound:
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS download failed: {e}")
|
||||
|
||||
def delete_file(self, remote_path: str) -> None:
|
||||
"""Delete file from GCS."""
|
||||
try:
|
||||
blob = self.bucket.blob(remote_path)
|
||||
blob.delete()
|
||||
except NotFound:
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS deletion failed: {e}")
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
"""List files in GCS bucket."""
|
||||
try:
|
||||
blobs = self.storage_client.list_blobs(
|
||||
self.bucket_name,
|
||||
prefix=prefix,
|
||||
max_results=max_results
|
||||
)
|
||||
|
||||
files = []
|
||||
for blob in blobs:
|
||||
files.append(StorageObject(
|
||||
key=blob.name,
|
||||
size=blob.size,
|
||||
last_modified=blob.updated.isoformat() if blob.updated else None,
|
||||
etag=blob.etag,
|
||||
metadata=blob.metadata
|
||||
))
|
||||
|
||||
return files
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS listing failed: {e}")
|
||||
|
||||
def file_exists(self, remote_path: str) -> bool:
|
||||
"""Check if file exists in GCS."""
|
||||
try:
|
||||
blob = self.bucket.blob(remote_path)
|
||||
return blob.exists()
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS file existence check failed: {e}")
|
||||
|
||||
def get_file_url(self, remote_path: str, expires_in: int = 3600) -> str:
|
||||
"""Generate signed URL for GCS object."""
|
||||
try:
|
||||
blob = self.bucket.blob(remote_path)
|
||||
|
||||
if not blob.exists():
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
|
||||
url = blob.generate_signed_url(
|
||||
version="v4",
|
||||
expiration=timedelta(seconds=expires_in),
|
||||
method="GET"
|
||||
)
|
||||
return url
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS signed URL generation failed: {e}")
|
||||
|
||||
def copy_file(self, source_path: str, dest_path: str) -> None:
|
||||
"""Copy file within GCS bucket (server-side copy)."""
|
||||
try:
|
||||
source_blob = self.bucket.blob(source_path)
|
||||
|
||||
if not source_blob.exists():
|
||||
raise FileNotFoundError(f"Source file not found: {source_path}")
|
||||
|
||||
self.bucket.copy_blob(
|
||||
source_blob,
|
||||
self.bucket,
|
||||
dest_path
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS copy failed: {e}")
|
||||
216
src/skill_seekers/cli/storage/s3_storage.py
Normal file
216
src/skill_seekers/cli/storage/s3_storage.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""
|
||||
AWS S3 storage adaptor implementation.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
BOTO3_AVAILABLE = True
|
||||
except ImportError:
|
||||
BOTO3_AVAILABLE = False
|
||||
|
||||
from .base_storage import BaseStorageAdaptor, StorageObject
|
||||
|
||||
|
||||
class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
"""
|
||||
AWS S3 storage adaptor.
|
||||
|
||||
Configuration:
|
||||
bucket: S3 bucket name (required)
|
||||
region: AWS region (optional, default: us-east-1)
|
||||
aws_access_key_id: AWS access key (optional, uses env/credentials)
|
||||
aws_secret_access_key: AWS secret key (optional, uses env/credentials)
|
||||
endpoint_url: Custom endpoint URL (optional, for S3-compatible services)
|
||||
|
||||
Environment Variables:
|
||||
AWS_ACCESS_KEY_ID: AWS access key
|
||||
AWS_SECRET_ACCESS_KEY: AWS secret key
|
||||
AWS_DEFAULT_REGION: AWS region
|
||||
|
||||
Examples:
|
||||
# Using environment variables
|
||||
adaptor = S3StorageAdaptor(bucket='my-bucket')
|
||||
|
||||
# With explicit credentials
|
||||
adaptor = S3StorageAdaptor(
|
||||
bucket='my-bucket',
|
||||
region='us-west-2',
|
||||
aws_access_key_id='AKIAIOSFODNN7EXAMPLE',
|
||||
aws_secret_access_key='wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
|
||||
)
|
||||
|
||||
# S3-compatible service (MinIO, DigitalOcean Spaces)
|
||||
adaptor = S3StorageAdaptor(
|
||||
bucket='my-bucket',
|
||||
endpoint_url='https://nyc3.digitaloceanspaces.com',
|
||||
aws_access_key_id='...',
|
||||
aws_secret_access_key='...'
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
Initialize S3 storage adaptor.
|
||||
|
||||
Args:
|
||||
bucket: S3 bucket name (required)
|
||||
**kwargs: Additional S3 configuration
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if not BOTO3_AVAILABLE:
|
||||
raise ImportError(
|
||||
"boto3 is required for S3 storage. "
|
||||
"Install with: pip install boto3"
|
||||
)
|
||||
|
||||
if 'bucket' not in kwargs:
|
||||
raise ValueError("bucket parameter is required for S3 storage")
|
||||
|
||||
self.bucket = kwargs['bucket']
|
||||
self.region = kwargs.get('region', os.getenv('AWS_DEFAULT_REGION', 'us-east-1'))
|
||||
|
||||
# Initialize S3 client
|
||||
client_kwargs = {
|
||||
'region_name': self.region,
|
||||
}
|
||||
|
||||
if 'endpoint_url' in kwargs:
|
||||
client_kwargs['endpoint_url'] = kwargs['endpoint_url']
|
||||
|
||||
if 'aws_access_key_id' in kwargs:
|
||||
client_kwargs['aws_access_key_id'] = kwargs['aws_access_key_id']
|
||||
|
||||
if 'aws_secret_access_key' in kwargs:
|
||||
client_kwargs['aws_secret_access_key'] = kwargs['aws_secret_access_key']
|
||||
|
||||
self.s3_client = boto3.client('s3', **client_kwargs)
|
||||
self.s3_resource = boto3.resource('s3', **client_kwargs)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: Optional[Dict[str, str]] = None
|
||||
) -> str:
|
||||
"""Upload file to S3."""
|
||||
local_file = Path(local_path)
|
||||
if not local_file.exists():
|
||||
raise FileNotFoundError(f"Local file not found: {local_path}")
|
||||
|
||||
extra_args = {}
|
||||
if metadata:
|
||||
extra_args['Metadata'] = metadata
|
||||
|
||||
try:
|
||||
self.s3_client.upload_file(
|
||||
str(local_file),
|
||||
self.bucket,
|
||||
remote_path,
|
||||
ExtraArgs=extra_args if extra_args else None
|
||||
)
|
||||
return f"s3://{self.bucket}/{remote_path}"
|
||||
except ClientError as e:
|
||||
raise Exception(f"S3 upload failed: {e}")
|
||||
|
||||
def download_file(self, remote_path: str, local_path: str) -> None:
|
||||
"""Download file from S3."""
|
||||
local_file = Path(local_path)
|
||||
local_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
self.s3_client.download_file(
|
||||
self.bucket,
|
||||
remote_path,
|
||||
str(local_file)
|
||||
)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
raise Exception(f"S3 download failed: {e}")
|
||||
|
||||
def delete_file(self, remote_path: str) -> None:
|
||||
"""Delete file from S3."""
|
||||
try:
|
||||
self.s3_client.delete_object(
|
||||
Bucket=self.bucket,
|
||||
Key=remote_path
|
||||
)
|
||||
except ClientError as e:
|
||||
raise Exception(f"S3 deletion failed: {e}")
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> List[StorageObject]:
|
||||
"""List files in S3 bucket."""
|
||||
try:
|
||||
paginator = self.s3_client.get_paginator('list_objects_v2')
|
||||
page_iterator = paginator.paginate(
|
||||
Bucket=self.bucket,
|
||||
Prefix=prefix,
|
||||
PaginationConfig={'MaxItems': max_results}
|
||||
)
|
||||
|
||||
files = []
|
||||
for page in page_iterator:
|
||||
if 'Contents' not in page:
|
||||
continue
|
||||
|
||||
for obj in page['Contents']:
|
||||
files.append(StorageObject(
|
||||
key=obj['Key'],
|
||||
size=obj['Size'],
|
||||
last_modified=obj['LastModified'].isoformat(),
|
||||
etag=obj.get('ETag', '').strip('"')
|
||||
))
|
||||
|
||||
return files
|
||||
except ClientError as e:
|
||||
raise Exception(f"S3 listing failed: {e}")
|
||||
|
||||
def file_exists(self, remote_path: str) -> bool:
|
||||
"""Check if file exists in S3."""
|
||||
try:
|
||||
self.s3_client.head_object(
|
||||
Bucket=self.bucket,
|
||||
Key=remote_path
|
||||
)
|
||||
return True
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
return False
|
||||
raise Exception(f"S3 head_object failed: {e}")
|
||||
|
||||
def get_file_url(self, remote_path: str, expires_in: int = 3600) -> str:
|
||||
"""Generate presigned URL for S3 object."""
|
||||
try:
|
||||
url = self.s3_client.generate_presigned_url(
|
||||
'get_object',
|
||||
Params={
|
||||
'Bucket': self.bucket,
|
||||
'Key': remote_path
|
||||
},
|
||||
ExpiresIn=expires_in
|
||||
)
|
||||
return url
|
||||
except ClientError as e:
|
||||
raise Exception(f"S3 presigned URL generation failed: {e}")
|
||||
|
||||
def copy_file(self, source_path: str, dest_path: str) -> None:
|
||||
"""Copy file within S3 bucket (server-side copy)."""
|
||||
try:
|
||||
copy_source = {
|
||||
'Bucket': self.bucket,
|
||||
'Key': source_path
|
||||
}
|
||||
self.s3_client.copy_object(
|
||||
CopySource=copy_source,
|
||||
Bucket=self.bucket,
|
||||
Key=dest_path
|
||||
)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
raise FileNotFoundError(f"Source file not found: {source_path}")
|
||||
raise Exception(f"S3 copy failed: {e}")
|
||||
224
src/skill_seekers/cli/sync_cli.py
Normal file
224
src/skill_seekers/cli/sync_cli.py
Normal file
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Documentation sync CLI.
|
||||
|
||||
Monitor documentation for changes and automatically update skills.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import signal
|
||||
from pathlib import Path
|
||||
|
||||
from ..sync import SyncMonitor
|
||||
|
||||
|
||||
def handle_signal(signum, frame):
|
||||
"""Handle interrupt signals."""
|
||||
print("\n🛑 Stopping sync monitor...")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
def start_command(args):
|
||||
"""Start monitoring."""
|
||||
monitor = SyncMonitor(
|
||||
config_path=args.config,
|
||||
check_interval=args.interval,
|
||||
auto_update=args.auto_update
|
||||
)
|
||||
|
||||
# Register signal handlers
|
||||
signal.signal(signal.SIGINT, handle_signal)
|
||||
signal.signal(signal.SIGTERM, handle_signal)
|
||||
|
||||
try:
|
||||
monitor.start()
|
||||
|
||||
print(f"\n📊 Monitoring {args.config}")
|
||||
print(f" Check interval: {args.interval}s ({args.interval // 60}m)")
|
||||
print(f" Auto-update: {'✅ enabled' if args.auto_update else '❌ disabled'}")
|
||||
print("\nPress Ctrl+C to stop\n")
|
||||
|
||||
# Keep running
|
||||
while True:
|
||||
import time
|
||||
time.sleep(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 Stopping...")
|
||||
monitor.stop()
|
||||
|
||||
|
||||
def check_command(args):
|
||||
"""Check for changes once."""
|
||||
monitor = SyncMonitor(
|
||||
config_path=args.config,
|
||||
check_interval=3600 # Not used for single check
|
||||
)
|
||||
|
||||
print(f"🔍 Checking {args.config} for changes...")
|
||||
|
||||
report = monitor.check_now(generate_diffs=args.diff)
|
||||
|
||||
print(f"\n📊 Results:")
|
||||
print(f" Total pages: {report.total_pages}")
|
||||
print(f" Added: {len(report.added)}")
|
||||
print(f" Modified: {len(report.modified)}")
|
||||
print(f" Deleted: {len(report.deleted)}")
|
||||
print(f" Unchanged: {report.unchanged}")
|
||||
|
||||
if report.has_changes:
|
||||
print(f"\n✨ Detected {report.change_count} changes!")
|
||||
|
||||
if args.verbose:
|
||||
if report.added:
|
||||
print("\n✅ Added pages:")
|
||||
for change in report.added:
|
||||
print(f" • {change.url}")
|
||||
|
||||
if report.modified:
|
||||
print("\n✏️ Modified pages:")
|
||||
for change in report.modified:
|
||||
print(f" • {change.url}")
|
||||
if change.diff and args.diff:
|
||||
print(f" Diff preview (first 5 lines):")
|
||||
for line in change.diff.split('\n')[:5]:
|
||||
print(f" {line}")
|
||||
|
||||
if report.deleted:
|
||||
print("\n❌ Deleted pages:")
|
||||
for change in report.deleted:
|
||||
print(f" • {change.url}")
|
||||
else:
|
||||
print("\n✅ No changes detected")
|
||||
|
||||
|
||||
def stats_command(args):
|
||||
"""Show monitoring statistics."""
|
||||
monitor = SyncMonitor(
|
||||
config_path=args.config,
|
||||
check_interval=3600
|
||||
)
|
||||
|
||||
stats = monitor.stats()
|
||||
|
||||
print(f"\n📊 Statistics for {stats['skill_name']}:")
|
||||
print(f" Status: {stats['status']}")
|
||||
print(f" Last check: {stats['last_check'] or 'Never'}")
|
||||
print(f" Last change: {stats['last_change'] or 'Never'}")
|
||||
print(f" Total checks: {stats['total_checks']}")
|
||||
print(f" Total changes: {stats['total_changes']}")
|
||||
print(f" Tracked pages: {stats['tracked_pages']}")
|
||||
print(f" Running: {'✅ Yes' if stats['running'] else '❌ No'}")
|
||||
|
||||
|
||||
def reset_command(args):
|
||||
"""Reset monitoring state."""
|
||||
state_file = Path(f"{args.skill_name}_sync.json")
|
||||
|
||||
if state_file.exists():
|
||||
if args.force or input(f"⚠️ Reset state for {args.skill_name}? [y/N]: ").lower() == 'y':
|
||||
state_file.unlink()
|
||||
print(f"✅ State reset for {args.skill_name}")
|
||||
else:
|
||||
print("❌ Reset cancelled")
|
||||
else:
|
||||
print(f"ℹ️ No state file found for {args.skill_name}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Monitor documentation for changes and update skills',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Start monitoring (checks every hour)
|
||||
skill-seekers-sync start --config configs/react.json
|
||||
|
||||
# Start with custom interval (10 minutes)
|
||||
skill-seekers-sync start --config configs/react.json --interval 600
|
||||
|
||||
# Start with auto-update
|
||||
skill-seekers-sync start --config configs/react.json --auto-update
|
||||
|
||||
# Check once (no continuous monitoring)
|
||||
skill-seekers-sync check --config configs/react.json
|
||||
|
||||
# Check with diffs
|
||||
skill-seekers-sync check --config configs/react.json --diff -v
|
||||
|
||||
# Show statistics
|
||||
skill-seekers-sync stats --config configs/react.json
|
||||
|
||||
# Reset state
|
||||
skill-seekers-sync reset --skill-name react
|
||||
"""
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest='command', help='Command to execute')
|
||||
|
||||
# Start command
|
||||
start_parser = subparsers.add_parser('start', help='Start continuous monitoring')
|
||||
start_parser.add_argument('--config', required=True, help='Path to skill config file')
|
||||
start_parser.add_argument(
|
||||
'--interval', '-i',
|
||||
type=int,
|
||||
default=3600,
|
||||
help='Check interval in seconds (default: 3600 = 1 hour)'
|
||||
)
|
||||
start_parser.add_argument(
|
||||
'--auto-update',
|
||||
action='store_true',
|
||||
help='Automatically rebuild skill on changes'
|
||||
)
|
||||
|
||||
# Check command
|
||||
check_parser = subparsers.add_parser('check', help='Check for changes once')
|
||||
check_parser.add_argument('--config', required=True, help='Path to skill config file')
|
||||
check_parser.add_argument(
|
||||
'--diff', '-d',
|
||||
action='store_true',
|
||||
help='Generate content diffs'
|
||||
)
|
||||
check_parser.add_argument(
|
||||
'--verbose', '-v',
|
||||
action='store_true',
|
||||
help='Show detailed output'
|
||||
)
|
||||
|
||||
# Stats command
|
||||
stats_parser = subparsers.add_parser('stats', help='Show monitoring statistics')
|
||||
stats_parser.add_argument('--config', required=True, help='Path to skill config file')
|
||||
|
||||
# Reset command
|
||||
reset_parser = subparsers.add_parser('reset', help='Reset monitoring state')
|
||||
reset_parser.add_argument('--skill-name', required=True, help='Skill name')
|
||||
reset_parser.add_argument(
|
||||
'--force', '-f',
|
||||
action='store_true',
|
||||
help='Skip confirmation'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
if args.command == 'start':
|
||||
start_command(args)
|
||||
elif args.command == 'check':
|
||||
check_command(args)
|
||||
elif args.command == 'stats':
|
||||
stats_command(args)
|
||||
elif args.command == 'reset':
|
||||
reset_command(args)
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user