diff --git a/src/skill_seekers/cli/estimate_pages.py b/src/skill_seekers/cli/estimate_pages.py index c2a23b0..0527805 100755 --- a/src/skill_seekers/cli/estimate_pages.py +++ b/src/skill_seekers/cli/estimate_pages.py @@ -11,6 +11,7 @@ from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse import time import json +from pathlib import Path # Add parent directory to path for imports when run as script sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -230,6 +231,121 @@ def load_config(config_path): sys.exit(1) +def find_configs_directory(): + """ + Find the configs directory using the same logic as the API. + + Returns: + Path to configs directory or None if not found + """ + # Get the package root (src/skill_seekers/) + package_root = Path(__file__).parent.parent + + # Try API configs_repo first (production) + api_config_dir = package_root.parent.parent / "api" / "configs_repo" / "official" + if api_config_dir.exists(): + return api_config_dir + + # Fallback to configs (local development) + local_config_dir = package_root.parent.parent / "configs" + if local_config_dir.exists(): + return local_config_dir + + return None + + +def list_all_configs(): + """ + List all available configuration files. + Uses the same directory logic as the API. + """ + config_dir = find_configs_directory() + + if not config_dir: + print("❌ Error: No config directory found") + print(" Tried: api/configs_repo/official/ and configs/") + return 1 + + print() + print("=" * 70) + print("📋 AVAILABLE CONFIGS") + print("=" * 70) + print() + print(f"📁 Config directory: {config_dir}") + print() + + # Find all JSON files recursively + config_files = sorted(config_dir.rglob("*.json")) + + if not config_files: + print("⚠️ No config files found") + return 1 + + # Group by category (subdirectory) + by_category = {} + for config_file in config_files: + # Get relative path from config_dir + rel_path = config_file.relative_to(config_dir) + + # Category is the first directory in the path, or "root" if in root + if len(rel_path.parts) > 1: + category = rel_path.parts[0] + else: + category = "root" + + if category not in by_category: + by_category[category] = [] + + # Try to load the config to get name and description + try: + with open(config_file, 'r') as f: + config_data = json.load(f) + + name = config_data.get("name", config_file.stem) + description = config_data.get("description", "No description") + + # Truncate description if too long + if len(description) > 60: + description = description[:57] + "..." + + by_category[category].append({ + "file": config_file.name, + "path": str(rel_path), + "name": name, + "description": description + }) + except Exception as e: + # If we can't parse the config, just use the filename + by_category[category].append({ + "file": config_file.name, + "path": str(rel_path), + "name": config_file.stem, + "description": f"⚠️ Error loading config: {e}" + }) + + # Print configs by category + total = 0 + for category in sorted(by_category.keys()): + configs = by_category[category] + total += len(configs) + + print(f"📦 {category.upper()}") + print("-" * 70) + + for config in configs: + print(f" • {config['name']}") + print(f" File: {config['path']}") + print(f" Description: {config['description']}") + print() + + print("=" * 70) + print(f"📊 Total: {total} configs found") + print("=" * 70) + print() + + return 0 + + def main(): """Main entry point""" import argparse @@ -239,6 +355,9 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: + # List all available configs + skill-seekers estimate --all + # Estimate pages for a config skill-seekers estimate configs/react.json @@ -250,7 +369,9 @@ Examples: """ ) - parser.add_argument('config', help='Path to config JSON file') + parser.add_argument('config', nargs='?', help='Path to config JSON file') + parser.add_argument('--all', action='store_true', + help='List all available configs from api/configs_repo/official/') parser.add_argument('--max-discovery', '-m', type=int, default=DEFAULT_MAX_DISCOVERY, help=f'Maximum pages to discover (default: {DEFAULT_MAX_DISCOVERY}, use -1 for unlimited)') parser.add_argument('--unlimited', '-u', action='store_true', @@ -260,6 +381,14 @@ Examples: args = parser.parse_args() + # Handle --all flag + if args.all: + return list_all_configs() + + # If not --all, config is required + if not args.config: + parser.error("the following arguments are required: config (or use --all to list configs)") + # Handle unlimited flag max_discovery = -1 if args.unlimited else args.max_discovery diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py index a28b2e8..47c5554 100644 --- a/src/skill_seekers/cli/main.py +++ b/src/skill_seekers/cli/main.py @@ -176,7 +176,8 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers help="Estimate page count before scraping", description="Estimate total pages for documentation scraping" ) - estimate_parser.add_argument("config", help="Config JSON file") + estimate_parser.add_argument("config", nargs="?", help="Config JSON file") + estimate_parser.add_argument("--all", action="store_true", help="List all available configs") estimate_parser.add_argument("--max-discovery", type=int, help="Max pages to discover") # === extract-test-examples subcommand === @@ -411,7 +412,11 @@ def main(argv: Optional[List[str]] = None) -> int: elif args.command == "estimate": from skill_seekers.cli.estimate_pages import main as estimate_main - sys.argv = ["estimate_pages.py", args.config] + sys.argv = ["estimate_pages.py"] + if args.all: + sys.argv.append("--all") + elif args.config: + sys.argv.append(args.config) if args.max_discovery: sys.argv.extend(["--max-discovery", str(args.max_discovery)]) return estimate_main() or 0 diff --git a/tests/test_estimate_pages.py b/tests/test_estimate_pages.py index e9a8fa4..3c906ea 100644 --- a/tests/test_estimate_pages.py +++ b/tests/test_estimate_pages.py @@ -134,6 +134,60 @@ class TestEstimatePagesCLI(unittest.TestCase): except FileNotFoundError: self.skipTest("skill-seekers command not installed") + def test_cli_all_flag_lists_configs(self): + """Test that --all flag lists all available configs""" + import subprocess + + try: + # Run with --all flag + result = subprocess.run( + ['skill-seekers', 'estimate', '--all'], + capture_output=True, + text=True, + timeout=10 + ) + + # Should succeed + self.assertEqual(result.returncode, 0) + + # Should contain expected output + output = result.stdout + self.assertIn('AVAILABLE CONFIGS', output) + self.assertIn('Total:', output) + self.assertIn('configs found', output) + + # Should list some known configs + # (these should exist in api/configs_repo/official/) + self.assertTrue( + 'react' in output.lower() or + 'django' in output.lower() or + 'godot' in output.lower(), + "Expected at least one known config name in output" + ) + except FileNotFoundError: + self.skipTest("skill-seekers command not installed") + + def test_cli_all_flag_with_direct_entry_point(self): + """Test --all flag works with skill-seekers-estimate entry point""" + import subprocess + + try: + result = subprocess.run( + ['skill-seekers-estimate', '--all'], + capture_output=True, + text=True, + timeout=10 + ) + + # Should succeed + self.assertEqual(result.returncode, 0) + + # Should show available configs + output = result.stdout + self.assertIn('AVAILABLE CONFIGS', output) + except FileNotFoundError: + self.skipTest("skill-seekers-estimate command not installed") + class TestEstimatePagesWithRealConfig(unittest.TestCase): """Test estimation with real config files (if available)"""