feat(v2.7.0): Smart Rate Limit Management & Multi-Token Configuration

Major Features:
- Multi-profile GitHub token system with secure storage
- Smart rate limit handler with 4 strategies (prompt/wait/switch/fail)
- Interactive configuration wizard with browser integration
- Configurable timeout (default 30 min) per profile
- Automatic profile switching on rate limits
- Live countdown timers with real-time progress
- Non-interactive mode for CI/CD (--non-interactive flag)
- Progress tracking and resume capability (skeleton)
- Comprehensive test suite (16 tests, all passing)

Solves:
- Indefinite waiting on GitHub rate limits
- Confusing GitHub token setup

Files Added:
- src/skill_seekers/cli/config_manager.py (~490 lines)
- src/skill_seekers/cli/config_command.py (~400 lines)
- src/skill_seekers/cli/rate_limit_handler.py (~450 lines)
- src/skill_seekers/cli/resume_command.py (~150 lines)
- tests/test_rate_limit_handler.py (16 tests)

Files Modified:
- src/skill_seekers/cli/github_fetcher.py (rate limit integration)
- src/skill_seekers/cli/github_scraper.py (--non-interactive, --profile flags)
- src/skill_seekers/cli/main.py (config, resume subcommands)
- pyproject.toml (version 2.7.0)
- CHANGELOG.md, README.md, CLAUDE.md (documentation)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-17 18:38:31 +03:00
parent 52ca93f22b
commit c89f059712
15 changed files with 2891 additions and 33 deletions

View File

@@ -0,0 +1,580 @@
"""
Interactive Configuration Wizard for Skill Seekers
Provides user-friendly setup for GitHub tokens, API keys, and settings.
"""
import sys
import webbrowser
from typing import Optional
from .config_manager import get_config_manager
def show_welcome_message():
"""Show first-run welcome message."""
print("""
╔═══════════════════════════════════════════════════════════════╗
║ ║
║ Welcome to Skill Seekers! 🎯 ║
║ ║
║ Convert documentation into LLM skills for Claude, Gemini, ║
║ OpenAI ChatGPT, and more! ║
║ ║
╚═══════════════════════════════════════════════════════════════╝
Quick Start:
1⃣ Set up GitHub token (optional, but recommended):
$ skill-seekers config --github
2⃣ Scrape documentation:
$ skill-seekers scrape --config configs/react.json
3⃣ View available presets:
$ skill-seekers estimate --all
For more help:
$ skill-seekers --help
$ skill-seekers config --help
Documentation: https://github.com/SkillSeekers/skill-seekers
""")
config = get_config_manager()
# Ask if user wants to run setup now
response = input("Would you like to run the configuration wizard now? [y/N]: ").strip().lower()
if response in ['y', 'yes']:
main_menu()
else:
print("\nYou can run the configuration wizard anytime with:")
print(" $ skill-seekers config\n")
config.mark_welcome_shown()
def main_menu():
"""Show main configuration menu."""
config = get_config_manager()
while True:
print("\n╔═══════════════════════════════════════════════════╗")
print("║ Skill Seekers Configuration ║")
print("╚═══════════════════════════════════════════════════╝\n")
print(" 1. GitHub Token Setup")
print(" 2. API Keys (Claude, Gemini, OpenAI)")
print(" 3. Rate Limit Settings")
print(" 4. Resume Settings")
print(" 5. View Current Configuration")
print(" 6. Test Connections")
print(" 7. Clean Up Old Progress Files")
print(" 0. Exit\n")
choice = input("Select an option [0-7]: ").strip()
if choice == "1":
github_token_menu()
elif choice == "2":
api_keys_menu()
elif choice == "3":
rate_limit_settings()
elif choice == "4":
resume_settings()
elif choice == "5":
config.display_config_summary()
input("\nPress Enter to continue...")
elif choice == "6":
test_connections()
elif choice == "7":
config.cleanup_old_progress()
input("\nPress Enter to continue...")
elif choice == "0":
print("\n✅ Configuration saved. Happy scraping! 🚀\n")
break
else:
print("❌ Invalid choice. Please try again.")
def github_token_menu():
"""GitHub token configuration menu."""
config = get_config_manager()
while True:
print("\n╔═══════════════════════════════════════════════════╗")
print("║ GitHub Token Management ║")
print("╚═══════════════════════════════════════════════════╝\n")
profiles = config.list_github_profiles()
if profiles:
print("Current Profiles:\n")
for p in profiles:
default = " ⭐ (default)" if p["is_default"] else ""
print(f"{p['name']}{default}")
if p["description"]:
print(f" {p['description']}")
print(f" Strategy: {p['strategy']}, Timeout: {p['timeout']}m\n")
else:
print("No GitHub profiles configured.\n")
print("Options:")
print(" 1. Add New Profile")
print(" 2. Remove Profile")
print(" 3. Set Default Profile")
print(" 4. Open GitHub Token Page")
print(" 0. Back to Main Menu\n")
choice = input("Select an option [0-4]: ").strip()
if choice == "1":
add_github_profile()
elif choice == "2":
remove_github_profile()
elif choice == "3":
set_default_profile()
elif choice == "4":
open_github_token_page()
elif choice == "0":
break
else:
print("❌ Invalid choice. Please try again.")
def add_github_profile():
"""Add a new GitHub profile interactively."""
config = get_config_manager()
print("\n📝 Add New GitHub Profile\n")
# Profile name
while True:
name = input("Profile name (e.g., 'personal', 'work'): ").strip()
if not name:
print("❌ Profile name cannot be empty.")
continue
if name in config.config["github"]["profiles"]:
print(f"❌ Profile '{name}' already exists.")
overwrite = input("Overwrite? [y/N]: ").strip().lower()
if overwrite not in ['y', 'yes']:
continue
break
# Description
description = input("Description (optional): ").strip()
# Token
print("\nTo create a GitHub token:")
print(" 1. Go to: https://github.com/settings/tokens")
print(" 2. Click 'Generate new token''Generate new token (classic)'")
print(" 3. Scopes needed:")
print(" • For public repos: 'public_repo'")
print(" • For private repos: 'repo' (full access)")
print(" 4. Copy the token (ghp_...)\n")
open_now = input("Open GitHub token page in browser? [Y/n]: ").strip().lower()
if open_now not in ['n', 'no']:
open_github_token_page()
while True:
token = input("\nGitHub token (ghp_...): ").strip()
if not token:
print("❌ Token cannot be empty.")
continue
if not (token.startswith("ghp_") or token.startswith("github_pat_")):
print("⚠️ Warning: Token doesn't match GitHub format")
proceed = input("Continue anyway? [y/N]: ").strip().lower()
if proceed not in ['y', 'yes']:
continue
break
# Rate limit strategy
print("\nRate Limit Strategy:")
print(" 1. prompt - Ask what to do (default)")
print(" 2. wait - Wait until reset")
print(" 3. switch - Try another profile")
print(" 4. fail - Fail immediately")
strategy_choice = input("\nSelect strategy [1-4] (default: 1): ").strip() or "1"
strategy_map = {
"1": "prompt",
"2": "wait",
"3": "switch",
"4": "fail"
}
strategy = strategy_map.get(strategy_choice, "prompt")
# Timeout
timeout_input = input("\nTimeout in minutes (default: 30): ").strip() or "30"
try:
timeout = int(timeout_input)
except ValueError:
print("⚠️ Invalid timeout, using default 30 minutes")
timeout = 30
# Set as default
has_profiles = bool(config.config["github"]["profiles"])
if has_profiles:
set_default = input("\nSet as default profile? [y/N]: ").strip().lower() in ['y', 'yes']
else:
set_default = True # First profile is always default
# Add profile
config.add_github_profile(
name=name,
token=token,
description=description,
rate_limit_strategy=strategy,
timeout_minutes=timeout,
set_as_default=set_default
)
print(f"\n✅ GitHub profile '{name}' added successfully!")
def remove_github_profile():
"""Remove a GitHub profile."""
config = get_config_manager()
profiles = config.list_github_profiles()
if not profiles:
print("\n❌ No profiles to remove.")
return
print("\n🗑️ Remove GitHub Profile\n")
print("Available profiles:")
for idx, p in enumerate(profiles, 1):
default = " (default)" if p["is_default"] else ""
print(f" {idx}. {p['name']}{default}")
choice = input(f"\nSelect profile to remove [1-{len(profiles)}] or 0 to cancel: ").strip()
try:
choice_idx = int(choice)
if choice_idx == 0:
return
if 1 <= choice_idx <= len(profiles):
profile_name = profiles[choice_idx - 1]["name"]
confirm = input(f"Really remove profile '{profile_name}'? [y/N]: ").strip().lower()
if confirm in ['y', 'yes']:
config.remove_github_profile(profile_name)
else:
print("❌ Invalid choice.")
except ValueError:
print("❌ Invalid input.")
def set_default_profile():
"""Set default GitHub profile."""
config = get_config_manager()
profiles = config.list_github_profiles()
if not profiles:
print("\n❌ No profiles available.")
return
print("\n⭐ Set Default GitHub Profile\n")
print("Available profiles:")
for idx, p in enumerate(profiles, 1):
default = " (current default)" if p["is_default"] else ""
print(f" {idx}. {p['name']}{default}")
choice = input(f"\nSelect default profile [1-{len(profiles)}] or 0 to cancel: ").strip()
try:
choice_idx = int(choice)
if choice_idx == 0:
return
if 1 <= choice_idx <= len(profiles):
profile_name = profiles[choice_idx - 1]["name"]
config.config["github"]["default_profile"] = profile_name
config.save_config()
print(f"\n✅ Set '{profile_name}' as default profile")
else:
print("❌ Invalid choice.")
except ValueError:
print("❌ Invalid input.")
def open_github_token_page():
"""Open GitHub token creation page in browser."""
url = "https://github.com/settings/tokens/new"
print(f"\n🌐 Opening {url}...")
try:
webbrowser.open(url)
print("✅ Opened in browser")
except Exception as e:
print(f"⚠️ Could not open browser: {e}")
print(f" Please visit: {url}")
def api_keys_menu():
"""API keys configuration menu."""
config = get_config_manager()
print("\n╔═══════════════════════════════════════════════════╗")
print("║ API Keys Management ║")
print("╚═══════════════════════════════════════════════════╝\n")
print("Current status:")
for provider in ["anthropic", "google", "openai"]:
key = config.get_api_key(provider)
status = "✅ Set" if key else "❌ Not set"
source = ""
if key:
import os
env_var = {
"anthropic": "ANTHROPIC_API_KEY",
"google": "GOOGLE_API_KEY",
"openai": "OPENAI_API_KEY"
}[provider]
if os.getenv(env_var):
source = " (from environment)"
else:
source = " (from config)"
print(f"{provider.capitalize()}: {status}{source}")
print("\nOptions:")
print(" 1. Set Anthropic (Claude) API Key")
print(" 2. Set Google (Gemini) API Key")
print(" 3. Set OpenAI (ChatGPT) API Key")
print(" 0. Back to Main Menu\n")
choice = input("Select an option [0-3]: ").strip()
provider_map = {
"1": ("anthropic", "https://console.anthropic.com/settings/keys"),
"2": ("google", "https://makersuite.google.com/app/apikey"),
"3": ("openai", "https://platform.openai.com/api-keys")
}
if choice in provider_map:
provider, url = provider_map[choice]
set_api_key(provider, url)
elif choice != "0":
print("❌ Invalid choice.")
def set_api_key(provider: str, url: str):
"""Set an API key interactively."""
config = get_config_manager()
print(f"\n🔑 Set {provider.capitalize()} API Key\n")
print(f"Get your API key at: {url}\n")
open_now = input("Open in browser? [Y/n]: ").strip().lower()
if open_now not in ['n', 'no']:
try:
webbrowser.open(url)
print("✅ Opened in browser\n")
except:
pass
key = input(f"Enter {provider.capitalize()} API key (or leave empty to skip): ").strip()
if key:
config.set_api_key(provider, key)
else:
print("⏭️ Skipped")
def rate_limit_settings():
"""Configure rate limit settings."""
config = get_config_manager()
print("\n╔═══════════════════════════════════════════════════╗")
print("║ Rate Limit Settings ║")
print("╚═══════════════════════════════════════════════════╝\n")
current = config.config["rate_limit"]
print(f"Current settings:")
print(f" • Default timeout: {current['default_timeout_minutes']} minutes")
print(f" • Auto-switch profiles: {current['auto_switch_profiles']}")
print(f" • Show countdown: {current['show_countdown']}\n")
# Timeout
timeout_input = input(f"Default timeout in minutes [{current['default_timeout_minutes']}]: ").strip()
if timeout_input:
try:
config.config["rate_limit"]["default_timeout_minutes"] = int(timeout_input)
except ValueError:
print("⚠️ Invalid input, keeping current value")
# Auto-switch
auto_switch_input = input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ").strip().lower()
if auto_switch_input:
config.config["rate_limit"]["auto_switch_profiles"] = auto_switch_input in ['y', 'yes']
# Show countdown
countdown_input = input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower()
if countdown_input:
config.config["rate_limit"]["show_countdown"] = countdown_input in ['y', 'yes']
config.save_config()
print("\n✅ Rate limit settings updated")
def resume_settings():
"""Configure resume/progress settings."""
config = get_config_manager()
print("\n╔═══════════════════════════════════════════════════╗")
print("║ Resume Settings ║")
print("╚═══════════════════════════════════════════════════╝\n")
current = config.config["resume"]
print(f"Current settings:")
print(f" • Auto-save interval: {current['auto_save_interval_seconds']} seconds")
print(f" • Keep progress for: {current['keep_progress_days']} days\n")
# Auto-save interval
interval_input = input(f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: ").strip()
if interval_input:
try:
config.config["resume"]["auto_save_interval_seconds"] = int(interval_input)
except ValueError:
print("⚠️ Invalid input, keeping current value")
# Keep days
days_input = input(f"Keep progress for how many days [{current['keep_progress_days']}]: ").strip()
if days_input:
try:
config.config["resume"]["keep_progress_days"] = int(days_input)
except ValueError:
print("⚠️ Invalid input, keeping current value")
config.save_config()
print("\n✅ Resume settings updated")
def test_connections():
"""Test GitHub and API connections."""
config = get_config_manager()
print("\n╔═══════════════════════════════════════════════════╗")
print("║ Connection Tests ║")
print("╚═══════════════════════════════════════════════════╝\n")
# Test GitHub tokens
print("Testing GitHub tokens...")
profiles = config.list_github_profiles()
if not profiles:
print(" ⚠️ No GitHub profiles configured")
else:
import requests
for p in profiles:
token = config.config["github"]["profiles"][p["name"]]["token"]
try:
response = requests.get(
"https://api.github.com/rate_limit",
headers={"Authorization": f"token {token}"},
timeout=5
)
if response.status_code == 200:
data = response.json()
remaining = data["rate"]["remaining"]
limit = data["rate"]["limit"]
print(f"{p['name']}: {remaining}/{limit} requests remaining")
else:
print(f"{p['name']}: Invalid token (status {response.status_code})")
except Exception as e:
print(f"{p['name']}: Connection failed - {e}")
print()
# Test API keys
print("Testing API keys...")
# Anthropic
anthropic_key = config.get_api_key("anthropic")
if anthropic_key:
print(" Anthropic: Key configured (test would consume credits)")
else:
print(" ⚠️ Anthropic: Not configured")
# Google
google_key = config.get_api_key("google")
if google_key:
print(" Google: Key configured (test would consume quota)")
else:
print(" ⚠️ Google: Not configured")
# OpenAI
openai_key = config.get_api_key("openai")
if openai_key:
print(" OpenAI: Key configured (test would consume credits)")
else:
print(" ⚠️ OpenAI: Not configured")
input("\nPress Enter to continue...")
def main():
"""Main entry point for config command."""
import argparse
parser = argparse.ArgumentParser(
description="Configure Skill Seekers settings"
)
parser.add_argument(
"--github",
action="store_true",
help="Go directly to GitHub token setup"
)
parser.add_argument(
"--api-keys",
action="store_true",
help="Go directly to API keys setup"
)
parser.add_argument(
"--show",
action="store_true",
help="Show current configuration and exit"
)
parser.add_argument(
"--test",
action="store_true",
help="Test connections and exit"
)
parser.add_argument(
"--welcome",
action="store_true",
help="Show welcome message"
)
args = parser.parse_args()
config = get_config_manager()
# Handle direct options
if args.welcome:
show_welcome_message()
return
if args.show:
config.display_config_summary()
return
if args.test:
test_connections()
return
if args.github:
github_token_menu()
return
if args.api_keys:
api_keys_menu()
return
# Show main menu
main_menu()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,464 @@
"""
Configuration Manager for Skill Seekers
Handles multi-profile GitHub tokens, API keys, and application settings.
Provides secure storage with file permissions and auto-detection capabilities.
"""
import json
import os
import stat
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional, Any
import sys
class ConfigManager:
"""Manages Skill Seekers configuration with multi-token support."""
# Default paths
CONFIG_DIR = Path.home() / ".config" / "skill-seekers"
CONFIG_FILE = CONFIG_DIR / "config.json"
WELCOME_FLAG = CONFIG_DIR / ".welcomed"
PROGRESS_DIR = Path.home() / ".local" / "share" / "skill-seekers" / "progress"
# Default configuration
DEFAULT_CONFIG = {
"version": "1.0",
"github": {
"default_profile": None,
"profiles": {}
},
"rate_limit": {
"default_timeout_minutes": 30,
"auto_switch_profiles": True,
"show_countdown": True
},
"resume": {
"auto_save_interval_seconds": 60,
"keep_progress_days": 7
},
"api_keys": {
"anthropic": None,
"google": None,
"openai": None
},
"first_run": {
"completed": False,
"version": "2.7.0"
}
}
def __init__(self):
"""Initialize configuration manager."""
self.config_dir = self.CONFIG_DIR
self.config_file = self.CONFIG_FILE
self.progress_dir = self.PROGRESS_DIR
self._ensure_directories()
self.config = self._load_config()
def _ensure_directories(self):
"""Ensure configuration and progress directories exist with secure permissions."""
for directory in [self.config_dir, self.progress_dir]:
directory.mkdir(parents=True, exist_ok=True)
# Set directory permissions to 700 (rwx------)
directory.chmod(stat.S_IRWXU)
def _load_config(self) -> Dict[str, Any]:
"""Load configuration from file or create default."""
if not self.config_file.exists():
return self.DEFAULT_CONFIG.copy()
try:
with open(self.config_file, 'r') as f:
config = json.load(f)
# Merge with defaults for any missing keys
config = self._merge_with_defaults(config)
return config
except (json.JSONDecodeError, IOError) as e:
print(f"⚠️ Warning: Could not load config file: {e}")
print(f" Using default configuration.")
return self.DEFAULT_CONFIG.copy()
def _merge_with_defaults(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""Merge loaded config with defaults to ensure all keys exist."""
def deep_merge(default: dict, custom: dict) -> dict:
result = default.copy()
for key, value in custom.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
return deep_merge(self.DEFAULT_CONFIG, config)
def save_config(self):
"""Save configuration to file with secure permissions."""
try:
with open(self.config_file, 'w') as f:
json.dump(self.config, f, indent=2)
# Set file permissions to 600 (rw-------)
self.config_file.chmod(stat.S_IRUSR | stat.S_IWUSR)
except IOError as e:
print(f"❌ Error saving config: {e}")
sys.exit(1)
# GitHub Token Management
def add_github_profile(
self,
name: str,
token: str,
description: str = "",
rate_limit_strategy: str = "prompt",
timeout_minutes: int = 30,
set_as_default: bool = False
):
"""Add a new GitHub profile."""
if not name:
raise ValueError("Profile name cannot be empty")
if not token.startswith("ghp_") and not token.startswith("github_pat_"):
print("⚠️ Warning: Token doesn't match GitHub format (ghp_* or github_pat_*)")
profile = {
"token": token,
"description": description,
"rate_limit_strategy": rate_limit_strategy,
"timeout_minutes": timeout_minutes,
"added_at": datetime.now().isoformat()
}
self.config["github"]["profiles"][name] = profile
if set_as_default or not self.config["github"]["default_profile"]:
self.config["github"]["default_profile"] = name
self.save_config()
print(f"✅ Added GitHub profile: {name}")
if set_as_default:
print(f"✅ Set as default profile")
def remove_github_profile(self, name: str):
"""Remove a GitHub profile."""
if name not in self.config["github"]["profiles"]:
raise ValueError(f"Profile '{name}' not found")
del self.config["github"]["profiles"][name]
# Update default if we removed it
if self.config["github"]["default_profile"] == name:
remaining = list(self.config["github"]["profiles"].keys())
self.config["github"]["default_profile"] = remaining[0] if remaining else None
self.save_config()
print(f"✅ Removed GitHub profile: {name}")
def list_github_profiles(self) -> List[Dict[str, Any]]:
"""List all GitHub profiles."""
profiles = []
default = self.config["github"]["default_profile"]
for name, data in self.config["github"]["profiles"].items():
profile_info = {
"name": name,
"description": data.get("description", ""),
"strategy": data.get("rate_limit_strategy", "prompt"),
"timeout": data.get("timeout_minutes", 30),
"is_default": name == default,
"added_at": data.get("added_at", "Unknown")
}
profiles.append(profile_info)
return profiles
def get_github_token(
self,
profile_name: Optional[str] = None,
repo_url: Optional[str] = None
) -> Optional[str]:
"""
Get GitHub token with smart fallback chain.
Priority:
1. Specified profile_name
2. Environment variable GITHUB_TOKEN
3. Default profile from config
4. None (will use 60/hour unauthenticated)
"""
# 1. Check specified profile
if profile_name:
profile = self.config["github"]["profiles"].get(profile_name)
if profile:
return profile["token"]
else:
print(f"⚠️ Warning: Profile '{profile_name}' not found")
# 2. Check environment variable
env_token = os.getenv("GITHUB_TOKEN")
if env_token:
return env_token
# 3. Check default profile
default_profile = self.config["github"]["default_profile"]
if default_profile:
profile = self.config["github"]["profiles"].get(default_profile)
if profile:
return profile["token"]
# 4. No token available
return None
def get_profile_for_token(self, token: str) -> Optional[str]:
"""Get profile name for a given token."""
for name, profile in self.config["github"]["profiles"].items():
if profile["token"] == token:
return name
return None
def get_next_profile(self, current_token: str) -> Optional[tuple]:
"""
Get next available profile for rate limit switching.
Returns: (profile_name, token) or None
"""
profiles = list(self.config["github"]["profiles"].items())
if len(profiles) <= 1:
return None
# Find current profile index
current_idx = None
for idx, (name, profile) in enumerate(profiles):
if profile["token"] == current_token:
current_idx = idx
break
if current_idx is None:
# Current token not in profiles, return first profile
name, profile = profiles[0]
return (name, profile["token"])
# Return next profile (circular)
next_idx = (current_idx + 1) % len(profiles)
name, profile = profiles[next_idx]
return (name, profile["token"])
def get_rate_limit_strategy(self, token: Optional[str] = None) -> str:
"""Get rate limit strategy for a token (or default)."""
if token:
profile_name = self.get_profile_for_token(token)
if profile_name:
profile = self.config["github"]["profiles"][profile_name]
return profile.get("rate_limit_strategy", "prompt")
# Default strategy
return "prompt"
def get_timeout_minutes(self, token: Optional[str] = None) -> int:
"""Get timeout minutes for a token (or default)."""
if token:
profile_name = self.get_profile_for_token(token)
if profile_name:
profile = self.config["github"]["profiles"][profile_name]
return profile.get("timeout_minutes", 30)
return self.config["rate_limit"]["default_timeout_minutes"]
# API Keys Management
def set_api_key(self, provider: str, key: str):
"""Set API key for a provider (anthropic, google, openai)."""
if provider not in self.config["api_keys"]:
raise ValueError(f"Unknown provider: {provider}. Use: anthropic, google, openai")
self.config["api_keys"][provider] = key
self.save_config()
print(f"✅ Set {provider.capitalize()} API key")
def get_api_key(self, provider: str) -> Optional[str]:
"""
Get API key with environment variable fallback.
Priority:
1. Environment variable
2. Config file
"""
# Check environment first
env_map = {
"anthropic": "ANTHROPIC_API_KEY",
"google": "GOOGLE_API_KEY",
"openai": "OPENAI_API_KEY"
}
env_var = env_map.get(provider)
if env_var:
env_key = os.getenv(env_var)
if env_key:
return env_key
# Check config file
return self.config["api_keys"].get(provider)
# Progress Management
def save_progress(self, job_id: str, progress_data: Dict[str, Any]):
"""Save progress for a job."""
progress_file = self.progress_dir / f"{job_id}.json"
progress_data["last_updated"] = datetime.now().isoformat()
with open(progress_file, 'w') as f:
json.dump(progress_data, f, indent=2)
# Set file permissions to 600
progress_file.chmod(stat.S_IRUSR | stat.S_IWUSR)
def load_progress(self, job_id: str) -> Optional[Dict[str, Any]]:
"""Load progress for a job."""
progress_file = self.progress_dir / f"{job_id}.json"
if not progress_file.exists():
return None
try:
with open(progress_file, 'r') as f:
return json.load(f)
except (json.JSONDecodeError, IOError):
return None
def list_resumable_jobs(self) -> List[Dict[str, Any]]:
"""List all resumable jobs."""
jobs = []
for progress_file in self.progress_dir.glob("*.json"):
try:
with open(progress_file, 'r') as f:
data = json.load(f)
if data.get("can_resume", False):
jobs.append({
"job_id": data.get("job_id", progress_file.stem),
"started_at": data.get("started_at"),
"command": data.get("command"),
"progress": data.get("progress", {}),
"last_updated": data.get("last_updated")
})
except (json.JSONDecodeError, IOError):
continue
# Sort by last updated (newest first)
jobs.sort(key=lambda x: x.get("last_updated", ""), reverse=True)
return jobs
def delete_progress(self, job_id: str):
"""Delete progress file for a job."""
progress_file = self.progress_dir / f"{job_id}.json"
if progress_file.exists():
progress_file.unlink()
def cleanup_old_progress(self):
"""Delete progress files older than configured days."""
keep_days = self.config["resume"]["keep_progress_days"]
cutoff_date = datetime.now() - timedelta(days=keep_days)
deleted_count = 0
for progress_file in self.progress_dir.glob("*.json"):
# Check file modification time
mtime = datetime.fromtimestamp(progress_file.stat().st_mtime)
if mtime < cutoff_date:
progress_file.unlink()
deleted_count += 1
if deleted_count > 0:
print(f"🧹 Cleaned up {deleted_count} old progress file(s)")
# First Run Experience
def is_first_run(self) -> bool:
"""Check if this is the first run."""
return not self.config["first_run"]["completed"]
def mark_first_run_complete(self):
"""Mark first run as completed."""
self.config["first_run"]["completed"] = True
self.save_config()
def should_show_welcome(self) -> bool:
"""Check if we should show welcome message."""
return not self.WELCOME_FLAG.exists()
def mark_welcome_shown(self):
"""Mark welcome message as shown."""
self.WELCOME_FLAG.touch()
self.WELCOME_FLAG.chmod(stat.S_IRUSR | stat.S_IWUSR)
# Display Helpers
def display_config_summary(self):
"""Display current configuration summary."""
print("\n📋 Skill Seekers Configuration\n")
print(f"Config file: {self.config_file}")
print(f"Progress dir: {self.progress_dir}\n")
# GitHub profiles
profiles = self.list_github_profiles()
print(f"GitHub Profiles: {len(profiles)}")
if profiles:
for p in profiles:
default_marker = " (default)" if p["is_default"] else ""
print(f"{p['name']}{default_marker}")
if p["description"]:
print(f" {p['description']}")
print(f" Strategy: {p['strategy']}, Timeout: {p['timeout']}m")
else:
print(" (none configured)")
print()
# API Keys
print("API Keys:")
for provider in ["anthropic", "google", "openai"]:
key = self.get_api_key(provider)
status = "✅ Set" if key else "❌ Not set"
source = ""
if key:
if os.getenv(provider.upper() + "_API_KEY"):
source = " (from environment)"
else:
source = " (from config)"
print(f"{provider.capitalize()}: {status}{source}")
print()
# Settings
print("Settings:")
print(f" • Rate limit timeout: {self.config['rate_limit']['default_timeout_minutes']}m")
print(f" • Auto-switch profiles: {self.config['rate_limit']['auto_switch_profiles']}")
print(f" • Keep progress for: {self.config['resume']['keep_progress_days']} days")
# Resumable jobs
jobs = self.list_resumable_jobs()
if jobs:
print(f"\n📦 Resumable Jobs: {len(jobs)}")
for job in jobs[:5]: # Show max 5
print(f"{job['job_id']}")
if job.get('progress'):
phase = job['progress'].get('phase', 'unknown')
print(f" Phase: {phase}, Last: {job['last_updated']}")
# Global instance
_config_manager = None
def get_config_manager() -> ConfigManager:
"""Get singleton config manager instance."""
global _config_manager
if _config_manager is None:
_config_manager = ConfigManager()
return _config_manager

View File

@@ -18,6 +18,9 @@ from typing import List, Dict, Optional, Tuple
from collections import Counter
import requests
from .rate_limit_handler import RateLimitHandler, RateLimitError, create_github_headers
from .config_manager import get_config_manager
@dataclass
class CodeStream:
@@ -69,17 +72,37 @@ class GitHubThreeStreamFetcher:
# - three_streams.insights_stream (for issue analyzer)
"""
def __init__(self, repo_url: str, github_token: Optional[str] = None):
def __init__(
self,
repo_url: str,
github_token: Optional[str] = None,
interactive: bool = True,
profile_name: Optional[str] = None
):
"""
Initialize fetcher.
Args:
repo_url: GitHub repository URL (e.g., https://github.com/owner/repo)
github_token: Optional GitHub API token for higher rate limits
interactive: Whether to show interactive prompts (False for CI/CD)
profile_name: Name of the GitHub profile being used
"""
self.repo_url = repo_url
self.github_token = github_token or os.getenv('GITHUB_TOKEN')
self.owner, self.repo = self.parse_repo_url(repo_url)
self.interactive = interactive
# Initialize rate limit handler
config = get_config_manager()
if not profile_name and self.github_token:
profile_name = config.get_profile_for_token(self.github_token)
self.rate_limiter = RateLimitHandler(
token=self.github_token,
interactive=interactive,
profile_name=profile_name
)
def parse_repo_url(self, url: str) -> Tuple[str, str]:
"""
@@ -118,7 +141,14 @@ class GitHubThreeStreamFetcher:
Returns:
ThreeStreamData with all 3 streams
Raises:
RateLimitError: If rate limit cannot be handled
"""
# Check rate limit upfront
if not self.rate_limiter.check_upfront():
raise RateLimitError("Rate limit check failed during startup")
if output_dir is None:
output_dir = Path(tempfile.mkdtemp(prefix='github_fetch_'))
@@ -190,14 +220,20 @@ class GitHubThreeStreamFetcher:
Returns:
Dict with stars, forks, language, open_issues, etc.
Raises:
RateLimitError: If rate limit cannot be handled
"""
url = f"https://api.github.com/repos/{self.owner}/{self.repo}"
headers = {}
if self.github_token:
headers['Authorization'] = f'token {self.github_token}'
headers = create_github_headers(self.github_token)
try:
response = requests.get(url, headers=headers, timeout=10)
# Check for rate limit
if not self.rate_limiter.check_response(response):
raise RateLimitError("Rate limit exceeded and cannot continue")
response.raise_for_status()
data = response.json()
@@ -213,6 +249,8 @@ class GitHubThreeStreamFetcher:
'html_url': data.get('html_url', ''), # NEW: Repository URL
'license': data.get('license', {}) # NEW: License info
}
except RateLimitError:
raise
except Exception as e:
print(f"⚠️ Failed to fetch metadata: {e}")
return {
@@ -258,11 +296,12 @@ class GitHubThreeStreamFetcher:
Returns:
List of issues
Raises:
RateLimitError: If rate limit cannot be handled
"""
url = f"https://api.github.com/repos/{self.owner}/{self.repo}/issues"
headers = {}
if self.github_token:
headers['Authorization'] = f'token {self.github_token}'
headers = create_github_headers(self.github_token)
params = {
'state': state,
@@ -273,6 +312,11 @@ class GitHubThreeStreamFetcher:
try:
response = requests.get(url, headers=headers, params=params, timeout=10)
# Check for rate limit
if not self.rate_limiter.check_response(response):
raise RateLimitError("Rate limit exceeded and cannot continue")
response.raise_for_status()
issues = response.json()
@@ -280,6 +324,8 @@ class GitHubThreeStreamFetcher:
issues = [issue for issue in issues if 'pull_request' not in issue]
return issues
except RateLimitError:
raise
except Exception as e:
print(f"⚠️ Failed to fetch {state} issues: {e}")
return []

View File

@@ -1303,6 +1303,10 @@ Examples:
help='Enhance SKILL.md using Claude Code (no API key needed)')
parser.add_argument('--api-key', type=str,
help='Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)')
parser.add_argument('--non-interactive', action='store_true',
help='Non-interactive mode for CI/CD (fail fast on rate limits)')
parser.add_argument('--profile', type=str,
help='GitHub profile name to use from config')
args = parser.parse_args()
@@ -1310,6 +1314,11 @@ Examples:
if args.config:
with open(args.config, 'r', encoding='utf-8') as f:
config = json.load(f)
# Override with CLI args if provided
if args.non_interactive:
config['interactive'] = False
if args.profile:
config['github_profile'] = args.profile
elif args.repo:
config = {
'repo': args.repo,
@@ -1319,7 +1328,9 @@ Examples:
'include_issues': not args.no_issues,
'include_changelog': not args.no_changelog,
'include_releases': not args.no_releases,
'max_issues': args.max_issues
'max_issues': args.max_issues,
'interactive': not args.non_interactive,
'github_profile': args.profile
}
else:
parser.error('Either --repo or --config is required')

View File

@@ -8,6 +8,7 @@ Usage:
skill-seekers <command> [options]
Commands:
config Configure GitHub tokens, API keys, and settings
scrape Scrape documentation website
github Scrape GitHub repository
pdf Extract from PDF file
@@ -19,6 +20,7 @@ Commands:
estimate Estimate page count before scraping
extract-test-examples Extract usage examples from test files
install-agent Install skill to AI agent directories
resume Resume interrupted scraping job
Examples:
skill-seekers scrape --config configs/react.json
@@ -65,7 +67,7 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
parser.add_argument(
"--version",
action="version",
version="%(prog)s 2.5.1"
version="%(prog)s 2.7.0"
)
subparsers = parser.add_subparsers(
@@ -75,6 +77,33 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
help="Command to run"
)
# === config subcommand ===
config_parser = subparsers.add_parser(
"config",
help="Configure GitHub tokens, API keys, and settings",
description="Interactive configuration wizard"
)
config_parser.add_argument(
"--github",
action="store_true",
help="Go directly to GitHub token setup"
)
config_parser.add_argument(
"--api-keys",
action="store_true",
help="Go directly to API keys setup"
)
config_parser.add_argument(
"--show",
action="store_true",
help="Show current configuration and exit"
)
config_parser.add_argument(
"--test",
action="store_true",
help="Test connections and exit"
)
# === scrape subcommand ===
scrape_parser = subparsers.add_parser(
"scrape",
@@ -105,6 +134,8 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
github_parser.add_argument("--non-interactive", action="store_true", help="Non-interactive mode (fail fast on rate limits)")
github_parser.add_argument("--profile", type=str, help="GitHub profile name from config")
# === pdf subcommand ===
pdf_parser = subparsers.add_parser(
@@ -280,6 +311,28 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
help="Preview workflow without executing"
)
# === resume subcommand ===
resume_parser = subparsers.add_parser(
"resume",
help="Resume interrupted scraping job",
description="Continue from saved progress checkpoint"
)
resume_parser.add_argument(
"job_id",
nargs="?",
help="Job ID to resume (or use --list to see available jobs)"
)
resume_parser.add_argument(
"--list",
action="store_true",
help="List all resumable jobs"
)
resume_parser.add_argument(
"--clean",
action="store_true",
help="Clean up old progress files"
)
return parser
@@ -301,7 +354,20 @@ def main(argv: Optional[List[str]] = None) -> int:
# Delegate to the appropriate tool
try:
if args.command == "scrape":
if args.command == "config":
from skill_seekers.cli.config_command import main as config_main
sys.argv = ["config_command.py"]
if args.github:
sys.argv.append("--github")
if args.api_keys:
sys.argv.append("--api-keys")
if args.show:
sys.argv.append("--show")
if args.test:
sys.argv.append("--test")
return config_main() or 0
elif args.command == "scrape":
from skill_seekers.cli.doc_scraper import main as scrape_main
# Convert args namespace to sys.argv format for doc_scraper
sys.argv = ["doc_scraper.py"]
@@ -344,6 +410,10 @@ def main(argv: Optional[List[str]] = None) -> int:
sys.argv.append("--enhance-local")
if args.api_key:
sys.argv.extend(["--api-key", args.api_key])
if args.non_interactive:
sys.argv.append("--non-interactive")
if args.profile:
sys.argv.extend(["--profile", args.profile])
return github_main() or 0
elif args.command == "pdf":
@@ -464,6 +534,17 @@ def main(argv: Optional[List[str]] = None) -> int:
sys.argv.append("--dry-run")
return install_main() or 0
elif args.command == "resume":
from skill_seekers.cli.resume_command import main as resume_main
sys.argv = ["resume_command.py"]
if args.job_id:
sys.argv.append(args.job_id)
if args.list:
sys.argv.append("--list")
if args.clean:
sys.argv.append("--clean")
return resume_main() or 0
else:
print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
parser.print_help()

View File

@@ -0,0 +1,435 @@
"""
Rate Limit Handler for GitHub API
Handles GitHub API rate limits with smart strategies:
- Upfront warnings about token status
- Real-time countdown timers
- Profile switching for multi-token setups
- Progress auto-save on interruption
- Non-interactive mode for CI/CD
"""
import time
import sys
from datetime import datetime, timedelta
from typing import Optional, Dict, Any
import requests
from .config_manager import get_config_manager
class RateLimitError(Exception):
"""Raised when rate limit is exceeded and cannot be handled."""
pass
class RateLimitHandler:
"""
Handles GitHub API rate limits with multiple strategies.
Usage:
handler = RateLimitHandler(
token=github_token,
interactive=True,
profile_name="personal"
)
# Before starting
handler.check_upfront()
# Around requests
response = requests.get(url, headers=headers)
handler.check_response(response)
"""
def __init__(
self,
token: Optional[str] = None,
interactive: bool = True,
profile_name: Optional[str] = None,
auto_switch: bool = True
):
"""
Initialize rate limit handler.
Args:
token: GitHub token (or None for unauthenticated)
interactive: Whether to show prompts (False for CI/CD)
profile_name: Name of the profile being used
auto_switch: Whether to auto-switch profiles when rate limited
"""
self.token = token
self.interactive = interactive
self.profile_name = profile_name
self.config = get_config_manager()
# Get settings from config
self.auto_switch = auto_switch and self.config.config["rate_limit"]["auto_switch_profiles"]
self.show_countdown = self.config.config["rate_limit"]["show_countdown"]
self.default_timeout = self.config.config["rate_limit"]["default_timeout_minutes"]
# Get profile-specific settings if available
if token:
self.strategy = self.config.get_rate_limit_strategy(token)
self.timeout_minutes = self.config.get_timeout_minutes(token)
else:
self.strategy = "prompt"
self.timeout_minutes = self.default_timeout
def check_upfront(self) -> bool:
"""
Check rate limit status before starting.
Shows non-intrusive warning if no token configured.
Returns:
True if check passed, False if should abort
"""
if not self.token:
print("\n💡 Tip: GitHub API limit is 60 requests/hour without a token.")
print(" Set up a GitHub token for 5000 requests/hour:")
print(" $ skill-seekers config --github")
print()
if self.interactive:
response = input("Continue without token? [Y/n]: ").strip().lower()
if response in ['n', 'no']:
print("\n✅ Run 'skill-seekers config --github' to set up a token.\n")
return False
return True
# Check current rate limit status
try:
rate_info = self.get_rate_limit_info()
remaining = rate_info.get('remaining', 0)
limit = rate_info.get('limit', 5000)
if remaining == 0:
print(f"\n⚠️ Warning: GitHub rate limit already exhausted (0/{limit})")
reset_time = rate_info.get('reset_time')
if reset_time:
wait_minutes = (reset_time - datetime.now()).total_seconds() / 60
print(f" Resets in {int(wait_minutes)} minutes")
if self.interactive:
return self.handle_rate_limit(rate_info)
else:
print("\n❌ Cannot proceed: Rate limit exhausted (non-interactive mode)\n")
return False
# Show friendly status
if remaining < 100:
print(f"⚠️ GitHub API: {remaining}/{limit} requests remaining")
else:
print(f"✅ GitHub API: {remaining}/{limit} requests available")
return True
except Exception as e:
print(f"⚠️ Could not check rate limit status: {e}")
print(" Proceeding anyway...")
return True
def check_response(self, response: requests.Response) -> bool:
"""
Check if response indicates rate limit and handle it.
Args:
response: requests.Response object
Returns:
True if handled successfully, False if should abort
Raises:
RateLimitError: If rate limit cannot be handled
"""
# Check for rate limit (403 with specific message)
if response.status_code == 403:
try:
error_data = response.json()
message = error_data.get('message', '')
if 'rate limit' in message.lower() or 'api rate limit exceeded' in message.lower():
# Extract rate limit info from headers
rate_info = self.extract_rate_limit_info(response)
return self.handle_rate_limit(rate_info)
except Exception:
pass # Not a rate limit error
return True
def extract_rate_limit_info(self, response: requests.Response) -> Dict[str, Any]:
"""
Extract rate limit information from response headers.
Args:
response: requests.Response with rate limit headers
Returns:
Dict with rate limit info
"""
headers = response.headers
limit = int(headers.get('X-RateLimit-Limit', 0))
remaining = int(headers.get('X-RateLimit-Remaining', 0))
reset_timestamp = int(headers.get('X-RateLimit-Reset', 0))
reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
return {
'limit': limit,
'remaining': remaining,
'reset_timestamp': reset_timestamp,
'reset_time': reset_time
}
def get_rate_limit_info(self) -> Dict[str, Any]:
"""
Get current rate limit status from GitHub API.
Returns:
Dict with rate limit info
"""
url = "https://api.github.com/rate_limit"
headers = {}
if self.token:
headers['Authorization'] = f'token {self.token}'
response = requests.get(url, headers=headers, timeout=5)
response.raise_for_status()
data = response.json()
core = data.get('rate', {})
reset_timestamp = core.get('reset', 0)
reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
return {
'limit': core.get('limit', 0),
'remaining': core.get('remaining', 0),
'reset_timestamp': reset_timestamp,
'reset_time': reset_time
}
def handle_rate_limit(self, rate_info: Dict[str, Any]) -> bool:
"""
Handle rate limit based on strategy.
Args:
rate_info: Dict with rate limit information
Returns:
True if handled (can continue), False if should abort
Raises:
RateLimitError: If cannot handle in non-interactive mode
"""
reset_time = rate_info.get('reset_time')
remaining = rate_info.get('remaining', 0)
limit = rate_info.get('limit', 0)
print(f"\n⚠️ GitHub Rate Limit Reached")
print(f" Profile: {self.profile_name or 'default'}")
print(f" Limit: {remaining}/{limit} requests")
if reset_time:
wait_seconds = (reset_time - datetime.now()).total_seconds()
wait_minutes = int(wait_seconds / 60)
print(f" Resets at: {reset_time.strftime('%H:%M:%S')} ({wait_minutes} minutes)")
else:
wait_seconds = 0
wait_minutes = 0
print()
# Strategy-based handling
if self.strategy == "fail":
print("❌ Strategy: fail - Aborting immediately")
if not self.interactive:
raise RateLimitError("Rate limit exceeded (fail strategy)")
return False
if self.strategy == "switch" and self.auto_switch:
# Try switching to another profile
new_profile = self.try_switch_profile()
if new_profile:
return True
else:
print("⚠️ No alternative profiles available")
# Fall through to other strategies
if self.strategy == "wait":
# Auto-wait with countdown
return self.wait_for_reset(wait_seconds, wait_minutes)
# Default: prompt user (if interactive)
if self.interactive:
return self.prompt_user_action(wait_seconds, wait_minutes)
else:
# Non-interactive mode: fail
raise RateLimitError("Rate limit exceeded (non-interactive mode)")
def try_switch_profile(self) -> bool:
"""
Try to switch to another GitHub profile.
Returns:
True if switched successfully, False otherwise
"""
if not self.token:
return False
next_profile_data = self.config.get_next_profile(self.token)
if not next_profile_data:
return False
next_name, next_token = next_profile_data
print(f"🔄 Switching to profile: {next_name}")
# Check if new profile has quota
try:
old_token = self.token
self.token = next_token
rate_info = self.get_rate_limit_info()
remaining = rate_info.get('remaining', 0)
limit = rate_info.get('limit', 0)
if remaining > 0:
print(f"✅ Profile '{next_name}' has {remaining}/{limit} requests available")
self.profile_name = next_name
return True
else:
print(f"⚠️ Profile '{next_name}' also exhausted ({remaining}/{limit})")
self.token = old_token # Restore old token
return False
except Exception as e:
print(f"❌ Failed to switch profiles: {e}")
self.token = old_token # Restore old token
return False
def wait_for_reset(self, wait_seconds: float, wait_minutes: int) -> bool:
"""
Wait for rate limit to reset with countdown.
Args:
wait_seconds: Seconds to wait
wait_minutes: Minutes to wait (for display)
Returns:
True if waited successfully, False if aborted
"""
# Check timeout
if wait_minutes > self.timeout_minutes:
print(f"⚠️ Wait time ({wait_minutes}m) exceeds timeout ({self.timeout_minutes}m)")
return False
if wait_seconds <= 0:
print("✅ Rate limit should be reset now")
return True
print(f"⏳ Waiting {wait_minutes} minutes for rate limit reset...")
print(" Press Ctrl+C to cancel\n")
try:
if self.show_countdown:
self.show_countdown_timer(wait_seconds)
else:
time.sleep(wait_seconds)
print("\n✅ Rate limit reset! Continuing...\n")
return True
except KeyboardInterrupt:
print("\n\n⏸️ Wait interrupted by user")
return False
def show_countdown_timer(self, total_seconds: float):
"""
Show a live countdown timer.
Args:
total_seconds: Total seconds to count down
"""
end_time = time.time() + total_seconds
while time.time() < end_time:
remaining = int(end_time - time.time())
minutes, seconds = divmod(remaining, 60)
# Print countdown on same line
sys.stdout.write(f"\r⏱️ Resuming in {minutes:02d}:{seconds:02d}...")
sys.stdout.flush()
time.sleep(1)
sys.stdout.write("\r" + " " * 50 + "\r") # Clear line
sys.stdout.flush()
def prompt_user_action(self, wait_seconds: float, wait_minutes: int) -> bool:
"""
Prompt user for action when rate limited.
Args:
wait_seconds: Seconds until reset
wait_minutes: Minutes until reset
Returns:
True if user chooses to continue, False to abort
"""
print("Options:")
print(f" [w] Wait {wait_minutes} minutes (auto-continues)")
# Check if profile switching is available
if self.token and self.config.get_next_profile(self.token):
print(" [s] Switch to another GitHub profile")
print(" [t] Set up new GitHub token")
print(" [c] Cancel")
print()
while True:
choice = input("Select an option [w/s/t/c]: ").strip().lower()
if choice == 'w':
return self.wait_for_reset(wait_seconds, wait_minutes)
elif choice == 's':
if self.try_switch_profile():
return True
else:
print("⚠️ Profile switching failed. Choose another option.")
continue
elif choice == 't':
print("\n💡 Opening GitHub token setup...")
print(" Run this command in another terminal:")
print(" $ skill-seekers config --github\n")
print(" Then restart your scraping job.\n")
return False
elif choice == 'c':
print("\n⏸️ Operation cancelled by user\n")
return False
else:
print("❌ Invalid choice. Please enter w, s, t, or c.")
def create_github_headers(token: Optional[str] = None) -> Dict[str, str]:
"""
Create GitHub API headers with optional token.
Args:
token: GitHub token (or None)
Returns:
Dict of headers
"""
headers = {}
if token:
headers['Authorization'] = f'token {token}'
return headers

View File

@@ -0,0 +1,174 @@
"""
Resume Command for Skill Seekers
Allows users to resume interrupted scraping jobs from saved progress.
"""
import sys
import argparse
from typing import Optional
from .config_manager import get_config_manager
def list_resumable_jobs():
"""List all resumable jobs with details."""
config = get_config_manager()
jobs = config.list_resumable_jobs()
if not jobs:
print("\n📦 No resumable jobs found.\n")
print("Jobs are automatically saved when:")
print(" • You interrupt a scraping operation (Ctrl+C)")
print(" • A rate limit is reached")
print(" • An error occurs during scraping\n")
return
print(f"\n📦 Resumable Jobs ({len(jobs)} available):\n")
for idx, job in enumerate(jobs, 1):
job_id = job["job_id"]
started = job.get("started_at", "Unknown")
command = job.get("command", "Unknown")
progress = job.get("progress", {})
last_updated = job.get("last_updated", "Unknown")
print(f"{idx}. Job ID: {job_id}")
print(f" Started: {started}")
print(f" Command: {command}")
if progress:
phase = progress.get("phase", "Unknown")
files_processed = progress.get("files_processed", 0)
files_total = progress.get("files_total", 0)
print(f" Progress: {phase}")
if files_total > 0:
percentage = (files_processed / files_total) * 100
print(f" Files: {files_processed}/{files_total} ({percentage:.1f}%)")
print(f" Last updated: {last_updated}")
print()
print("To resume a job:")
print(" $ skill-seekers resume <job_id>\n")
def resume_job(job_id: str):
"""Resume a specific job."""
config = get_config_manager()
print(f"\n🔄 Resuming job: {job_id}\n")
# Load progress
progress = config.load_progress(job_id)
if not progress:
print(f"❌ Job '{job_id}' not found or cannot be resumed.\n")
print("Use 'skill-seekers resume --list' to see available jobs.\n")
return 1
if not progress.get("can_resume", False):
print(f"❌ Job '{job_id}' is not marked as resumable.\n")
return 1
# Extract job details
command = progress.get("command", "")
job_config = progress.get("config", {})
checkpoint = progress.get("progress", {}).get("last_checkpoint")
print(f"Original command: {command}")
print(f"Last checkpoint: {checkpoint or 'Unknown'}")
print()
# Reconstruct command
if "github" in command:
print("📌 Resuming GitHub scraping...")
print("⚠️ Note: GitHub resume feature not yet implemented")
print(" You can re-run the original command - it will use cached data where available.\n")
print(f" Command: {command}\n")
return 1
elif "scrape" in command:
print("📌 Resuming documentation scraping...")
print("⚠️ Note: Documentation scraping resume feature not yet implemented")
print(" You can re-run the original command - it will use cached data where available.\n")
print(f" Command: {command}\n")
return 1
elif "unified" in command:
print("📌 Resuming unified scraping...")
print("⚠️ Note: Unified scraping resume feature not yet implemented")
print(" You can re-run the original command - it will use cached data where available.\n")
print(f" Command: {command}\n")
return 1
else:
print("❌ Unknown job type. Cannot resume.\n")
return 1
def clean_old_jobs():
"""Clean up old progress files."""
config = get_config_manager()
print("\n🧹 Cleaning up old progress files...\n")
jobs_before = len(config.list_resumable_jobs())
config.cleanup_old_progress()
jobs_after = len(config.list_resumable_jobs())
deleted = jobs_before - jobs_after
if deleted > 0:
print(f"✅ Deleted {deleted} old job(s)")
else:
print("✅ No old jobs to clean up")
if jobs_after > 0:
print(f"📦 {jobs_after} job(s) remaining\n")
else:
print()
def main():
"""Main entry point for resume command."""
parser = argparse.ArgumentParser(
description="Resume interrupted Skill Seekers jobs"
)
parser.add_argument(
"job_id",
nargs="?",
help="Job ID to resume"
)
parser.add_argument(
"--list",
action="store_true",
help="List all resumable jobs"
)
parser.add_argument(
"--clean",
action="store_true",
help="Clean up old progress files"
)
args = parser.parse_args()
# Handle options
if args.list:
list_resumable_jobs()
return 0
if args.clean:
clean_old_jobs()
return 0
if not args.job_id:
print("\n❌ Error: Job ID required or use --list to see available jobs\n")
parser.print_help()
return 1
return resume_job(args.job_id)
if __name__ == "__main__":
sys.exit(main())