feat(v2.7.0): Smart Rate Limit Management & Multi-Token Configuration

Major Features: - Multi-profile GitHub token system with secure storage - Smart rate limit handler with 4 strategies (prompt/wait/switch/fail) - Interactive configuration wizard with browser integration - Configurable timeout (default 30 min) per profile - Automatic profile switching on rate limits - Live countdown timers with real-time progress - Non-interactive mode for CI/CD (--non-interactive flag) - Progress tracking and resume capability (skeleton) - Comprehensive test suite (16 tests, all passing) Solves: - Indefinite waiting on GitHub rate limits - Confusing GitHub token setup Files Added: - src/skill_seekers/cli/config_manager.py (~490 lines) - src/skill_seekers/cli/config_command.py (~400 lines) - src/skill_seekers/cli/rate_limit_handler.py (~450 lines) - src/skill_seekers/cli/resume_command.py (~150 lines) - tests/test_rate_limit_handler.py (16 tests) Files Modified: - src/skill_seekers/cli/github_fetcher.py (rate limit integration) - src/skill_seekers/cli/github_scraper.py (--non-interactive, --profile flags) - src/skill_seekers/cli/main.py (config, resume subcommands) - pyproject.toml (version 2.7.0) - CHANGELOG.md, README.md, CLAUDE.md (documentation) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-17 18:38:31 +03:00
parent 52ca93f22b
commit c89f059712
15 changed files with 2891 additions and 33 deletions
--- a/src/skill_seekers/cli/resume_command.py
+++ b/src/skill_seekers/cli/resume_command.py
@@ -0,0 +1,174 @@
+"""
+Resume Command for Skill Seekers
+
+Allows users to resume interrupted scraping jobs from saved progress.
+"""
+
+import sys
+import argparse
+from typing import Optional
+from .config_manager import get_config_manager
+
+
+def list_resumable_jobs():
+    """List all resumable jobs with details."""
+    config = get_config_manager()
+    jobs = config.list_resumable_jobs()
+
+    if not jobs:
+        print("\n📦 No resumable jobs found.\n")
+        print("Jobs are automatically saved when:")
+        print("  • You interrupt a scraping operation (Ctrl+C)")
+        print("  • A rate limit is reached")
+        print("  • An error occurs during scraping\n")
+        return
+
+    print(f"\n📦 Resumable Jobs ({len(jobs)} available):\n")
+
+    for idx, job in enumerate(jobs, 1):
+        job_id = job["job_id"]
+        started = job.get("started_at", "Unknown")
+        command = job.get("command", "Unknown")
+        progress = job.get("progress", {})
+        last_updated = job.get("last_updated", "Unknown")
+
+        print(f"{idx}. Job ID: {job_id}")
+        print(f"   Started: {started}")
+        print(f"   Command: {command}")
+
+        if progress:
+            phase = progress.get("phase", "Unknown")
+            files_processed = progress.get("files_processed", 0)
+            files_total = progress.get("files_total", 0)
+
+            print(f"   Progress: {phase}")
+            if files_total > 0:
+                percentage = (files_processed / files_total) * 100
+                print(f"   Files: {files_processed}/{files_total} ({percentage:.1f}%)")
+
+        print(f"   Last updated: {last_updated}")
+        print()
+
+    print("To resume a job:")
+    print("  $ skill-seekers resume <job_id>\n")
+
+
+def resume_job(job_id: str):
+    """Resume a specific job."""
+    config = get_config_manager()
+
+    print(f"\n🔄 Resuming job: {job_id}\n")
+
+    # Load progress
+    progress = config.load_progress(job_id)
+
+    if not progress:
+        print(f"❌ Job '{job_id}' not found or cannot be resumed.\n")
+        print("Use 'skill-seekers resume --list' to see available jobs.\n")
+        return 1
+
+    if not progress.get("can_resume", False):
+        print(f"❌ Job '{job_id}' is not marked as resumable.\n")
+        return 1
+
+    # Extract job details
+    command = progress.get("command", "")
+    job_config = progress.get("config", {})
+    checkpoint = progress.get("progress", {}).get("last_checkpoint")
+
+    print(f"Original command: {command}")
+    print(f"Last checkpoint: {checkpoint or 'Unknown'}")
+    print()
+
+    # Reconstruct command
+    if "github" in command:
+        print("📌 Resuming GitHub scraping...")
+        print("⚠️  Note: GitHub resume feature not yet implemented")
+        print("   You can re-run the original command - it will use cached data where available.\n")
+        print(f"   Command: {command}\n")
+        return 1
+
+    elif "scrape" in command:
+        print("📌 Resuming documentation scraping...")
+        print("⚠️  Note: Documentation scraping resume feature not yet implemented")
+        print("   You can re-run the original command - it will use cached data where available.\n")
+        print(f"   Command: {command}\n")
+        return 1
+
+    elif "unified" in command:
+        print("📌 Resuming unified scraping...")
+        print("⚠️  Note: Unified scraping resume feature not yet implemented")
+        print("   You can re-run the original command - it will use cached data where available.\n")
+        print(f"   Command: {command}\n")
+        return 1
+
+    else:
+        print("❌ Unknown job type. Cannot resume.\n")
+        return 1
+
+
+def clean_old_jobs():
+    """Clean up old progress files."""
+    config = get_config_manager()
+
+    print("\n🧹 Cleaning up old progress files...\n")
+
+    jobs_before = len(config.list_resumable_jobs())
+    config.cleanup_old_progress()
+    jobs_after = len(config.list_resumable_jobs())
+
+    deleted = jobs_before - jobs_after
+
+    if deleted > 0:
+        print(f"✅ Deleted {deleted} old job(s)")
+    else:
+        print("✅ No old jobs to clean up")
+
+    if jobs_after > 0:
+        print(f"📦 {jobs_after} job(s) remaining\n")
+    else:
+        print()
+
+
+def main():
+    """Main entry point for resume command."""
+    parser = argparse.ArgumentParser(
+        description="Resume interrupted Skill Seekers jobs"
+    )
+    parser.add_argument(
+        "job_id",
+        nargs="?",
+        help="Job ID to resume"
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List all resumable jobs"
+    )
+    parser.add_argument(
+        "--clean",
+        action="store_true",
+        help="Clean up old progress files"
+    )
+
+    args = parser.parse_args()
+
+    # Handle options
+    if args.list:
+        list_resumable_jobs()
+        return 0
+
+    if args.clean:
+        clean_old_jobs()
+        return 0
+
+    if not args.job_id:
+        print("\n❌ Error: Job ID required or use --list to see available jobs\n")
+        parser.print_help()
+        return 1
+
+    return resume_job(args.job_id)
+
+
+if __name__ == "__main__":
+    sys.exit(main())