change max lenght

This commit is contained in:
Pablo Estevez
2026-01-17 17:48:15 +00:00
parent 97e597d9db
commit c33c6f9073
118 changed files with 3546 additions and 960 deletions

View File

@@ -274,19 +274,24 @@ class ConfigAnalyzer:
# Add source type tags
if "base_url" in config_data or (
config_type == "unified" and any(s.get("type") == "documentation" for s in config_data.get("sources", []))
config_type == "unified"
and any(s.get("type") == "documentation" for s in config_data.get("sources", []))
):
tags.add("documentation")
if "repo" in config_data or (
config_type == "unified" and any(s.get("type") == "github" for s in config_data.get("sources", []))
config_type == "unified"
and any(s.get("type") == "github" for s in config_data.get("sources", []))
):
tags.add("github")
if (
"pdf" in config_data
or "pdf_url" in config_data
or (config_type == "unified" and any(s.get("type") == "pdf" for s in config_data.get("sources", [])))
or (
config_type == "unified"
and any(s.get("type") == "pdf" for s in config_data.get("sources", []))
)
):
tags.add("pdf")

View File

@@ -58,7 +58,9 @@ async def root():
@app.get("/api/configs")
async def list_configs(category: str | None = None, tag: str | None = None, type: str | None = None) -> dict[str, Any]:
async def list_configs(
category: str | None = None, tag: str | None = None, type: str | None = None
) -> dict[str, Any]:
"""
List all available configs with metadata

View File

@@ -46,7 +46,13 @@ print()
print("**By Type:**")
for conflict_type, count in summary["by_type"].items():
if count > 0:
emoji = "📖" if conflict_type == "missing_in_docs" else "💻" if conflict_type == "missing_in_code" else "⚠️"
emoji = (
"📖"
if conflict_type == "missing_in_docs"
else "💻"
if conflict_type == "missing_in_code"
else "⚠️"
)
print(f" {emoji} {conflict_type}: {count}")
print()
@@ -86,10 +92,14 @@ if high:
if conflict["code_info"]:
print("\n**Implemented as**:")
params = conflict["code_info"].get("parameters", [])
param_str = ", ".join(f"{p['name']}: {p.get('type_hint', 'Any')}" for p in params if p["name"] != "self")
param_str = ", ".join(
f"{p['name']}: {p.get('type_hint', 'Any')}" for p in params if p["name"] != "self"
)
print(f" Signature: {conflict['code_info']['name']}({param_str})")
print(f" Return type: {conflict['code_info'].get('return_type', 'None')}")
print(f" Location: {conflict['code_info'].get('source', 'N/A')}:{conflict['code_info'].get('line', '?')}")
print(
f" Location: {conflict['code_info'].get('source', 'N/A')}:{conflict['code_info'].get('line', '?')}"
)
print()
# Show medium severity

View File

@@ -171,7 +171,7 @@ exclude_lines = [
]
[tool.ruff]
line-length = 120
line-length = 100
target-version = "py310"
src = ["src", "tests"]

View File

@@ -67,7 +67,9 @@ def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor:
if platform not in ADAPTORS:
available = ", ".join(ADAPTORS.keys())
if not ADAPTORS:
raise ValueError(f"No adaptors are currently implemented. Platform '{platform}' is not available.")
raise ValueError(
f"No adaptors are currently implemented. Platform '{platform}' is not available."
)
raise ValueError(
f"Platform '{platform}' is not supported or not yet implemented. Available platforms: {available}"
)

View File

@@ -167,14 +167,28 @@ version: {metadata.version}
# Validate ZIP file
package_path = Path(package_path)
if not package_path.exists():
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"File not found: {package_path}",
}
if not package_path.suffix == ".zip":
return {"success": False, "skill_id": None, "url": None, "message": f"Not a ZIP file: {package_path}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Not a ZIP file: {package_path}",
}
# Prepare API request
api_url = self.DEFAULT_API_ENDPOINT
headers = {"x-api-key": api_key, "anthropic-version": "2023-06-01", "anthropic-beta": "skills-2025-10-02"}
headers = {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"anthropic-beta": "skills-2025-10-02",
}
timeout = kwargs.get("timeout", 60)
@@ -231,7 +245,12 @@ version: {metadata.version}
except:
error_msg = f"HTTP {response.status_code}"
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {error_msg}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Upload failed: {error_msg}",
}
except requests.exceptions.Timeout:
return {
@@ -250,7 +269,12 @@ version: {metadata.version}
}
except Exception as e:
return {"success": False, "skill_id": None, "url": None, "message": f"Unexpected error: {str(e)}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Unexpected error: {str(e)}",
}
def validate_api_key(self, api_key: str) -> bool:
"""
@@ -363,7 +387,9 @@ version: {metadata.version}
print(f"❌ Error calling Claude API: {e}")
return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]:
def _read_reference_files(
self, references_dir: Path, max_chars: int = 200000
) -> dict[str, str]:
"""
Read reference markdown files from skill directory.

View File

@@ -169,10 +169,20 @@ See the references directory for complete documentation with examples and best p
# Validate package file FIRST
package_path = Path(package_path)
if not package_path.exists():
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"File not found: {package_path}",
}
if not package_path.suffix == ".gz":
return {"success": False, "skill_id": None, "url": None, "message": f"Not a tar.gz file: {package_path}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Not a tar.gz file: {package_path}",
}
# Check for google-generativeai library
try:
@@ -210,7 +220,9 @@ See the references directory for complete documentation with examples and best p
}
# Upload to Files API
uploaded_file = genai.upload_file(path=str(main_file), display_name=f"{package_path.stem}_instructions")
uploaded_file = genai.upload_file(
path=str(main_file), display_name=f"{package_path.stem}_instructions"
)
# Upload reference files (if any)
refs_dir = temp_path / "references"
@@ -230,7 +242,12 @@ See the references directory for complete documentation with examples and best p
}
except Exception as e:
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {str(e)}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Upload failed: {str(e)}",
}
def validate_api_key(self, api_key: str) -> bool:
"""
@@ -337,7 +354,9 @@ See the references directory for complete documentation with examples and best p
print(f"❌ Error calling Gemini API: {e}")
return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]:
def _read_reference_files(
self, references_dir: Path, max_chars: int = 200000
) -> dict[str, str]:
"""
Read reference markdown files from skill directory.

View File

@@ -185,10 +185,20 @@ Always prioritize accuracy by consulting the attached documentation files before
# Validate package file FIRST
package_path = Path(package_path)
if not package_path.exists():
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"File not found: {package_path}",
}
if not package_path.suffix == ".zip":
return {"success": False, "skill_id": None, "url": None, "message": f"Not a ZIP file: {package_path}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Not a ZIP file: {package_path}",
}
# Check for openai library
try:
@@ -254,7 +264,9 @@ Always prioritize accuracy by consulting the attached documentation files before
# Attach files to vector store
if file_ids:
client.beta.vector_stores.files.create_batch(vector_store_id=vector_store.id, file_ids=file_ids)
client.beta.vector_stores.files.create_batch(
vector_store_id=vector_store.id, file_ids=file_ids
)
# Create assistant
assistant = client.beta.assistants.create(
@@ -273,7 +285,12 @@ Always prioritize accuracy by consulting the attached documentation files before
}
except Exception as e:
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {str(e)}"}
return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Upload failed: {str(e)}",
}
def validate_api_key(self, api_key: str) -> bool:
"""
@@ -389,7 +406,9 @@ Always prioritize accuracy by consulting the attached documentation files before
print(f"❌ Error calling OpenAI API: {e}")
return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]:
def _read_reference_files(
self, references_dir: Path, max_chars: int = 200000
) -> dict[str, str]:
"""
Read reference markdown files from skill directory.

View File

@@ -66,7 +66,9 @@ class AIEnhancer:
self.mode = "disabled"
self.enabled = False
logger.info(" AI enhancement disabled (no API key found)")
logger.info(" Set ANTHROPIC_API_KEY to enable, or use 'skill-seekers enhance' for SKILL.md")
logger.info(
" Set ANTHROPIC_API_KEY to enable, or use 'skill-seekers enhance' for SKILL.md"
)
return
if self.mode == "api" and self.enabled:
@@ -86,7 +88,9 @@ class AIEnhancer:
# LOCAL mode requires Claude Code to be available
# For patterns/examples, this is less practical than API mode
logger.info(" LOCAL mode not yet supported for pattern/example enhancement")
logger.info(" Use API mode (set ANTHROPIC_API_KEY) or 'skill-seekers enhance' for SKILL.md")
logger.info(
" Use API mode (set ANTHROPIC_API_KEY) or 'skill-seekers enhance' for SKILL.md"
)
self.enabled = False
def _call_claude(self, prompt: str, max_tokens: int = 1000) -> str | None:
@@ -96,7 +100,9 @@ class AIEnhancer:
try:
response = self.client.messages.create(
model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}]
model="claude-sonnet-4-20250514",
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
)
return response.content[0].text
except Exception as e:

View File

@@ -94,7 +94,9 @@ class APIReferenceBuilder:
name_without_ext = basename.rsplit(".", 1)[0] if "." in basename else basename
return f"{name_without_ext}.md"
def _generate_file_reference(self, file_data: dict[str, Any], source_file: str, language: str) -> str:
def _generate_file_reference(
self, file_data: dict[str, Any], source_file: str, language: str
) -> str:
"""
Generate complete markdown reference for a single file.
@@ -334,7 +336,9 @@ def main():
"""
import argparse
parser = argparse.ArgumentParser(description="Generate API reference from code analysis results")
parser = argparse.ArgumentParser(
description="Generate API reference from code analysis results"
)
parser.add_argument("input_file", help="Code analysis JSON file")
parser.add_argument("output_dir", help="Output directory for markdown files")

View File

@@ -197,7 +197,9 @@ class ArchitecturalPatternDetector:
return detected
def _detect_mvc(self, dirs: dict[str, int], files: list[dict], frameworks: list[str]) -> list[ArchitecturalPattern]:
def _detect_mvc(
self, dirs: dict[str, int], files: list[dict], frameworks: list[str]
) -> list[ArchitecturalPattern]:
"""Detect MVC pattern"""
patterns = []
@@ -226,7 +228,9 @@ class ArchitecturalPatternDetector:
if len(components["Views"]) == 1:
evidence.append("Views directory with view files")
if "controller" in file_path and ("controllers/" in file_path or "/controller/" in file_path):
if "controller" in file_path and (
"controllers/" in file_path or "/controller/" in file_path
):
components["Controllers"].append(file.get("file", ""))
if len(components["Controllers"]) == 1:
evidence.append("Controllers directory with controller classes")
@@ -288,11 +292,15 @@ class ArchitecturalPatternDetector:
if "view" in file_path:
components["Views"].append(file.get("file", ""))
if "viewmodel" in file_path or any("viewmodel" in c.get("name", "").lower() for c in classes):
if "viewmodel" in file_path or any(
"viewmodel" in c.get("name", "").lower() for c in classes
):
components["ViewModels"].append(file.get("file", ""))
if len(components["ViewModels"]) >= 2:
evidence.append(f"ViewModels directory with {len(components['ViewModels'])} ViewModel classes")
evidence.append(
f"ViewModels directory with {len(components['ViewModels'])} ViewModel classes"
)
if len(components["Views"]) >= 2:
evidence.append(f"Views directory with {len(components['Views'])} view files")
@@ -329,7 +337,9 @@ class ArchitecturalPatternDetector:
return patterns
def _detect_repository(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
def _detect_repository(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Repository pattern"""
patterns = []
@@ -352,7 +362,9 @@ class ArchitecturalPatternDetector:
components["Repositories"].append(file.get("file", ""))
if len(components["Repositories"]) >= 2:
evidence.append(f"Repository pattern: {len(components['Repositories'])} repository classes")
evidence.append(
f"Repository pattern: {len(components['Repositories'])} repository classes"
)
evidence.append("Repositories abstract data access logic")
patterns.append(
@@ -367,7 +379,9 @@ class ArchitecturalPatternDetector:
return patterns
def _detect_service_layer(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
def _detect_service_layer(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Service Layer pattern"""
patterns = []
@@ -404,7 +418,9 @@ class ArchitecturalPatternDetector:
return patterns
def _detect_layered_architecture(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
def _detect_layered_architecture(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Layered Architecture (3-tier, N-tier)"""
patterns = []
@@ -444,7 +460,9 @@ class ArchitecturalPatternDetector:
return patterns
def _detect_clean_architecture(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
def _detect_clean_architecture(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Clean Architecture"""
patterns = []

View File

@@ -150,7 +150,9 @@ class CodeAnalyzer:
is_method = any(
isinstance(parent, ast.ClassDef)
for parent in ast.walk(tree)
if hasattr(parent, "body") and isinstance(parent.body, list) and node in parent.body
if hasattr(parent, "body")
and isinstance(parent.body, list)
and node in parent.body
)
except (TypeError, AttributeError):
# If body is not iterable or check fails, assume it's a top-level function
@@ -173,7 +175,9 @@ class CodeAnalyzer:
if isinstance(base, ast.Name):
bases.append(base.id)
elif isinstance(base, ast.Attribute):
bases.append(f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr)
bases.append(
f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr
)
# Extract methods
methods = []
@@ -186,7 +190,11 @@ class CodeAnalyzer:
docstring = ast.get_docstring(node)
return ClassSignature(
name=node.name, base_classes=bases, methods=methods, docstring=docstring, line_number=node.lineno
name=node.name,
base_classes=bases,
methods=methods,
docstring=docstring,
line_number=node.lineno,
)
def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
@@ -209,7 +217,9 @@ class CodeAnalyzer:
param_idx = num_no_default + i
if param_idx < len(params):
try:
params[param_idx].default = ast.unparse(default) if hasattr(ast, "unparse") else str(default)
params[param_idx].default = (
ast.unparse(default) if hasattr(ast, "unparse") else str(default)
)
except:
params[param_idx].default = "..."
@@ -719,7 +729,9 @@ class CodeAnalyzer:
# Distinguish XML doc comments (///)
comment_type = "doc" if match.group(1).startswith("/") else "inline"
comments.append({"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type})
comments.append(
{"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}
)
# Multi-line comments (/* */)
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
@@ -1325,9 +1337,7 @@ class CodeAnalyzer:
"""Extract PHP method signatures from class body."""
methods = []
method_pattern = (
r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
)
method_pattern = r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
for match in re.finditer(method_pattern, class_body):
method_name = match.group(1)
params_str = match.group(2)
@@ -1445,7 +1455,8 @@ def create_sprite(texture: str) -> Node2D:
for method in cls["methods"]:
params = ", ".join(
[
f"{p['name']}: {p['type_hint']}" + (f" = {p['default']}" if p.get("default") else "")
f"{p['name']}: {p['type_hint']}"
+ (f" = {p['default']}" if p.get("default") else "")
for p in method["parameters"]
]
)

View File

@@ -301,7 +301,11 @@ def analyze_codebase(
# Only include files with actual analysis results
if analysis and (analysis.get("classes") or analysis.get("functions")):
results["files"].append(
{"file": str(file_path.relative_to(directory)), "language": language, **analysis}
{
"file": str(file_path.relative_to(directory)),
"language": language,
**analysis,
}
)
analyzed_count += 1
@@ -441,7 +445,10 @@ def analyze_codebase(
# Create extractor
test_extractor = TestExampleExtractor(
min_confidence=0.5, max_per_file=10, languages=languages, enhance_with_ai=enhance_with_ai
min_confidence=0.5,
max_per_file=10,
languages=languages,
enhance_with_ai=enhance_with_ai,
)
# Extract examples from directory
@@ -487,7 +494,11 @@ def analyze_codebase(
tutorials_dir = output_dir / "tutorials"
# Get workflow examples from the example_report if available
if "example_report" in locals() and example_report and example_report.total_examples > 0:
if (
"example_report" in locals()
and example_report
and example_report.total_examples > 0
):
# Convert example_report to list of dicts for processing
examples_list = example_report.to_dict().get("examples", [])
@@ -565,7 +576,9 @@ def analyze_codebase(
if "ai_enhancements" in result_dict:
insights = result_dict["ai_enhancements"].get("overall_insights", {})
if insights.get("security_issues_found"):
logger.info(f"🔐 Security issues found: {insights['security_issues_found']}")
logger.info(
f"🔐 Security issues found: {insights['security_issues_found']}"
)
logger.info(f"📁 Saved to: {config_output}")
else:
@@ -741,10 +754,14 @@ Use this skill when you need to:
refs_added = False
if build_api_reference and (output_dir / "api_reference").exists():
skill_content += "- **API Reference**: `references/api_reference/` - Complete API documentation\n"
skill_content += (
"- **API Reference**: `references/api_reference/` - Complete API documentation\n"
)
refs_added = True
if build_dependency_graph and (output_dir / "dependencies").exists():
skill_content += "- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
skill_content += (
"- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
)
refs_added = True
if detect_patterns and (output_dir / "patterns").exists():
skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
@@ -753,7 +770,9 @@ Use this skill when you need to:
skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
refs_added = True
if extract_config_patterns and (output_dir / "config_patterns").exists():
skill_content += "- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
skill_content += (
"- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
)
refs_added = True
if (output_dir / "architecture").exists():
skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
@@ -1057,12 +1076,21 @@ Examples:
)
parser.add_argument("--directory", required=True, help="Directory to analyze")
parser.add_argument("--output", default="output/codebase/", help="Output directory (default: output/codebase/)")
parser.add_argument(
"--depth", choices=["surface", "deep", "full"], default="deep", help="Analysis depth (default: deep)"
"--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
)
parser.add_argument(
"--depth",
choices=["surface", "deep", "full"],
default="deep",
help="Analysis depth (default: deep)",
)
parser.add_argument(
"--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
)
parser.add_argument(
"--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
)
parser.add_argument("--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)")
parser.add_argument("--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)")
parser.add_argument(
"--skip-api-reference",
action="store_true",

View File

@@ -320,9 +320,11 @@ def api_keys_menu():
if key:
import os
env_var = {"anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "openai": "OPENAI_API_KEY"}[
provider
]
env_var = {
"anthropic": "ANTHROPIC_API_KEY",
"google": "GOOGLE_API_KEY",
"openai": "OPENAI_API_KEY",
}[provider]
if os.getenv(env_var):
source = " (from environment)"
else:
@@ -389,7 +391,9 @@ def rate_limit_settings():
print(f" • Show countdown: {current['show_countdown']}\n")
# Timeout
timeout_input = input(f"Default timeout in minutes [{current['default_timeout_minutes']}]: ").strip()
timeout_input = input(
f"Default timeout in minutes [{current['default_timeout_minutes']}]: "
).strip()
if timeout_input:
try:
config.config["rate_limit"]["default_timeout_minutes"] = int(timeout_input)
@@ -398,13 +402,17 @@ def rate_limit_settings():
# Auto-switch
auto_switch_input = (
input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ").strip().lower()
input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ")
.strip()
.lower()
)
if auto_switch_input:
config.config["rate_limit"]["auto_switch_profiles"] = auto_switch_input in ["y", "yes"]
# Show countdown
countdown_input = input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower()
countdown_input = (
input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower()
)
if countdown_input:
config.config["rate_limit"]["show_countdown"] = countdown_input in ["y", "yes"]
@@ -427,7 +435,9 @@ def resume_settings():
print(f" • Keep progress for: {current['keep_progress_days']} days\n")
# Auto-save interval
interval_input = input(f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: ").strip()
interval_input = input(
f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: "
).strip()
if interval_input:
try:
config.config["resume"]["auto_save_interval_seconds"] = int(interval_input)
@@ -435,7 +445,9 @@ def resume_settings():
print("⚠️ Invalid input, keeping current value")
# Keep days
days_input = input(f"Keep progress for how many days [{current['keep_progress_days']}]: ").strip()
days_input = input(
f"Keep progress for how many days [{current['keep_progress_days']}]: "
).strip()
if days_input:
try:
config.config["resume"]["keep_progress_days"] = int(days_input)
@@ -467,7 +479,9 @@ def test_connections():
token = config.config["github"]["profiles"][p["name"]]["token"]
try:
response = requests.get(
"https://api.github.com/rate_limit", headers={"Authorization": f"token {token}"}, timeout=5
"https://api.github.com/rate_limit",
headers={"Authorization": f"token {token}"},
timeout=5,
)
if response.status_code == 200:
data = response.json()

View File

@@ -136,7 +136,9 @@ class ConfigEnhancer:
# Call Claude API
logger.info("📡 Calling Claude API for config analysis...")
response = self.client.messages.create(
model="claude-sonnet-4-20250514", max_tokens=8000, messages=[{"role": "user", "content": prompt}]
model="claude-sonnet-4-20250514",
max_tokens=8000,
messages=[{"role": "user", "content": prompt}],
)
# Parse response
@@ -157,7 +159,9 @@ class ConfigEnhancer:
for cf in config_files[:10]: # Limit to first 10 files
settings_summary = []
for setting in cf.get("settings", [])[:5]: # First 5 settings per file
settings_summary.append(f" - {setting['key']}: {setting['value']} ({setting['value_type']})")
settings_summary.append(
f" - {setting['key']}: {setting['value']} ({setting['value_type']})"
)
config_summary.append(f"""
File: {cf["relative_path"]} ({cf["config_type"]})
@@ -221,7 +225,9 @@ Focus on actionable insights that help developers understand and improve their c
original_result["ai_enhancements"] = enhancements
# Add enhancement flags to config files
file_enhancements = {e["file_path"]: e for e in enhancements.get("file_enhancements", [])}
file_enhancements = {
e["file_path"]: e for e in enhancements.get("file_enhancements", [])
}
for cf in original_result.get("config_files", []):
file_path = cf.get("relative_path", cf.get("file_path"))
if file_path in file_enhancements:
@@ -385,9 +391,14 @@ def main():
parser = argparse.ArgumentParser(description="AI-enhance configuration extraction results")
parser.add_argument("result_file", help="Path to config extraction JSON result file")
parser.add_argument(
"--mode", choices=["auto", "api", "local"], default="auto", help="Enhancement mode (default: auto)"
"--mode",
choices=["auto", "api", "local"],
default="auto",
help="Enhancement mode (default: auto)",
)
parser.add_argument(
"--output", help="Output file for enhanced results (default: <input>_enhanced.json)"
)
parser.add_argument("--output", help="Output file for enhanced results (default: <input>_enhanced.json)")
args = parser.parse_args()

View File

@@ -63,7 +63,9 @@ class ConfigFile:
file_path: str
relative_path: str
config_type: Literal["json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"]
config_type: Literal[
"json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"
]
purpose: str # Inferred purpose: database, api, logging, etc.
settings: list[ConfigSetting] = field(default_factory=list)
patterns: list[str] = field(default_factory=list)
@@ -156,11 +158,23 @@ class ConfigFileDetector:
CONFIG_PATTERNS = {
"json": {
"patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"],
"names": ["config.json", "settings.json", "app.json", ".eslintrc.json", ".prettierrc.json"],
"names": [
"config.json",
"settings.json",
"app.json",
".eslintrc.json",
".prettierrc.json",
],
},
"yaml": {
"patterns": ["*.yaml", "*.yml"],
"names": ["config.yml", "settings.yml", ".travis.yml", ".gitlab-ci.yml", "docker-compose.yml"],
"names": [
"config.yml",
"settings.yml",
".travis.yml",
".gitlab-ci.yml",
"docker-compose.yml",
],
},
"toml": {
"patterns": ["*.toml"],
@@ -498,7 +512,9 @@ class ConfigParser:
key = match.group(1)
value = match.group(3) if len(match.groups()) > 2 else match.group(2)
setting = ConfigSetting(key=key, value=value, value_type=self._infer_type(value))
setting = ConfigSetting(
key=key, value=value, value_type=self._infer_type(value)
)
config_file.settings.append(setting)
def _parse_dockerfile(self, config_file: ConfigFile):
@@ -514,7 +530,10 @@ class ConfigParser:
if len(parts) == 2:
key, value = parts
setting = ConfigSetting(
key=key.strip(), value=value.strip(), value_type="string", env_var=key.strip()
key=key.strip(),
value=value.strip(),
value_type="string",
env_var=key.strip(),
)
config_file.settings.append(setting)
@@ -527,7 +546,9 @@ class ConfigParser:
setting = ConfigSetting(key=key, value=value, value_type="string")
config_file.settings.append(setting)
def _extract_settings_from_dict(self, data: dict, config_file: ConfigFile, parent_path: list[str] = None):
def _extract_settings_from_dict(
self, data: dict, config_file: ConfigFile, parent_path: list[str] = None
):
"""Recursively extract settings from dictionary"""
if parent_path is None:
parent_path = []
@@ -636,7 +657,9 @@ class ConfigPatternDetector:
if matches >= min_match:
detected.append(pattern_name)
logger.debug(f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)")
logger.debug(
f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)"
)
return detected
@@ -649,7 +672,9 @@ class ConfigExtractor:
self.parser = ConfigParser()
self.pattern_detector = ConfigPatternDetector()
def extract_from_directory(self, directory: Path, max_files: int = 100) -> ConfigExtractionResult:
def extract_from_directory(
self, directory: Path, max_files: int = 100
) -> ConfigExtractionResult:
"""
Extract configuration patterns from directory.
@@ -695,7 +720,9 @@ class ConfigExtractor:
logger.error(error_msg)
result.errors.append(error_msg)
logger.info(f"Extracted {result.total_settings} settings from {result.total_files} config files")
logger.info(
f"Extracted {result.total_settings} settings from {result.total_files} config files"
)
logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}")
return result
@@ -741,12 +768,18 @@ def main():
)
parser.add_argument("directory", type=Path, help="Directory to analyze")
parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
parser.add_argument("--max-files", type=int, default=100, help="Maximum config files to process")
parser.add_argument(
"--enhance", action="store_true", help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)"
"--max-files", type=int, default=100, help="Maximum config files to process"
)
parser.add_argument(
"--enhance-local", action="store_true", help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)"
"--enhance",
action="store_true",
help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)",
)
parser.add_argument(
"--enhance-local",
action="store_true",
help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)",
)
parser.add_argument(
"--ai-mode",

View File

@@ -27,7 +27,11 @@ class ConfigManager:
DEFAULT_CONFIG = {
"version": "1.0",
"github": {"default_profile": None, "profiles": {}},
"rate_limit": {"default_timeout_minutes": 30, "auto_switch_profiles": True, "show_countdown": True},
"rate_limit": {
"default_timeout_minutes": 30,
"auto_switch_profiles": True,
"show_countdown": True,
},
"resume": {"auto_save_interval_seconds": 60, "keep_progress_days": 7},
"api_keys": {"anthropic": None, "google": None, "openai": None},
"first_run": {"completed": False, "version": "2.7.0"},
@@ -161,7 +165,9 @@ class ConfigManager:
return profiles
def get_github_token(self, profile_name: str | None = None, repo_url: str | None = None) -> str | None:
def get_github_token(
self, profile_name: str | None = None, repo_url: str | None = None
) -> str | None:
"""
Get GitHub token with smart fallback chain.
@@ -269,7 +275,11 @@ class ConfigManager:
2. Config file
"""
# Check environment first
env_map = {"anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "openai": "OPENAI_API_KEY"}
env_map = {
"anthropic": "ANTHROPIC_API_KEY",
"google": "GOOGLE_API_KEY",
"openai": "OPENAI_API_KEY",
}
env_var = env_map.get(provider)
if env_var:

View File

@@ -112,7 +112,9 @@ class ConfigValidator:
# Validate merge_mode (optional)
merge_mode = self.config.get("merge_mode", "rule-based")
if merge_mode not in self.VALID_MERGE_MODES:
raise ValueError(f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}")
raise ValueError(
f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}"
)
# Validate each source
for i, source in enumerate(sources):
@@ -130,7 +132,9 @@ class ConfigValidator:
source_type = source["type"]
if source_type not in self.VALID_SOURCE_TYPES:
raise ValueError(f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}")
raise ValueError(
f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}"
)
# Type-specific validation
if source_type == "documentation":
@@ -147,7 +151,9 @@ class ConfigValidator:
# Optional but recommended fields
if "selectors" not in source:
logger.warning(f"Source {index} (documentation): No 'selectors' specified, using defaults")
logger.warning(
f"Source {index} (documentation): No 'selectors' specified, using defaults"
)
if "max_pages" in source and not isinstance(source["max_pages"], int):
raise ValueError(f"Source {index} (documentation): 'max_pages' must be an integer")
@@ -178,8 +184,12 @@ class ConfigValidator:
raise ValueError(f"Source {index} (github): 'max_issues' must be an integer")
# Validate enable_codebase_analysis if specified (C3.5)
if "enable_codebase_analysis" in source and not isinstance(source["enable_codebase_analysis"], bool):
raise ValueError(f"Source {index} (github): 'enable_codebase_analysis' must be a boolean")
if "enable_codebase_analysis" in source and not isinstance(
source["enable_codebase_analysis"], bool
):
raise ValueError(
f"Source {index} (github): 'enable_codebase_analysis' must be a boolean"
)
# Validate ai_mode if specified (C3.5)
if "ai_mode" in source:
@@ -249,7 +259,10 @@ class ConfigValidator:
"description": self.config.get("description", "Documentation skill"),
"merge_mode": "rule-based",
"sources": [
{"type": "documentation", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}
{
"type": "documentation",
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
}
],
}
return unified
@@ -261,7 +274,10 @@ class ConfigValidator:
"description": self.config.get("description", "GitHub repository skill"),
"merge_mode": "rule-based",
"sources": [
{"type": "github", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}
{
"type": "github",
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
}
],
}
return unified
@@ -272,7 +288,12 @@ class ConfigValidator:
"name": self.config.get("name", "unnamed"),
"description": self.config.get("description", "PDF document skill"),
"merge_mode": "rule-based",
"sources": [{"type": "pdf", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}],
"sources": [
{
"type": "pdf",
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
}
],
}
return unified
@@ -312,11 +333,13 @@ class ConfigValidator:
return False
has_docs_api = any(
s.get("type") == "documentation" and s.get("extract_api", True) for s in self.config["sources"]
s.get("type") == "documentation" and s.get("extract_api", True)
for s in self.config["sources"]
)
has_github_code = any(
s.get("type") == "github" and s.get("include_code", False) for s in self.config["sources"]
s.get("type") == "github" and s.get("include_code", False)
for s in self.config["sources"]
)
return has_docs_api and has_github_code

View File

@@ -451,7 +451,12 @@ class ConflictDetector:
}
# Count by type
for conflict_type in ["missing_in_docs", "missing_in_code", "signature_mismatch", "description_mismatch"]:
for conflict_type in [
"missing_in_docs",
"missing_in_code",
"signature_mismatch",
"description_mismatch",
]:
count = sum(1 for c in conflicts if c.type == conflict_type)
summary["by_type"][conflict_type] = count
@@ -470,7 +475,10 @@ class ConflictDetector:
conflicts: List of Conflict objects
output_path: Path to output JSON file
"""
data = {"conflicts": [asdict(c) for c in conflicts], "summary": self.generate_summary(conflicts)}
data = {
"conflicts": [asdict(c) for c in conflicts],
"summary": self.generate_summary(conflicts),
}
with open(output_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)

View File

@@ -86,7 +86,9 @@ class DependencyAnalyzer:
def __init__(self):
"""Initialize dependency analyzer."""
if not NETWORKX_AVAILABLE:
raise ImportError("NetworkX is required for dependency analysis. Install with: pip install networkx")
raise ImportError(
"NetworkX is required for dependency analysis. Install with: pip install networkx"
)
self.graph = nx.DiGraph() # Directed graph for dependencies
self.file_dependencies: dict[str, list[DependencyInfo]] = {}
@@ -130,7 +132,9 @@ class DependencyAnalyzer:
# Create file node
imported_modules = [dep.imported_module for dep in deps]
self.file_nodes[file_path] = FileNode(file_path=file_path, language=language, dependencies=imported_modules)
self.file_nodes[file_path] = FileNode(
file_path=file_path, language=language, dependencies=imported_modules
)
return deps
@@ -594,7 +598,9 @@ class DependencyAnalyzer:
if target and target in self.file_nodes:
# Add edge from source to dependency
self.graph.add_edge(file_path, target, import_type=dep.import_type, line_number=dep.line_number)
self.graph.add_edge(
file_path, target, import_type=dep.import_type, line_number=dep.line_number
)
# Update imported_by lists
if target in self.file_nodes:
@@ -602,7 +608,9 @@ class DependencyAnalyzer:
return self.graph
def _resolve_import(self, source_file: str, imported_module: str, is_relative: bool) -> str | None:
def _resolve_import(
self, source_file: str, imported_module: str, is_relative: bool
) -> str | None:
"""
Resolve import statement to actual file path.
@@ -736,10 +744,14 @@ class DependencyAnalyzer:
"circular_dependencies": len(self.detect_cycles()),
"strongly_connected_components": len(self.get_strongly_connected_components()),
"avg_dependencies_per_file": (
self.graph.number_of_edges() / self.graph.number_of_nodes() if self.graph.number_of_nodes() > 0 else 0
self.graph.number_of_edges() / self.graph.number_of_nodes()
if self.graph.number_of_nodes() > 0
else 0
),
"files_with_no_dependencies": len(
[node for node in self.graph.nodes() if self.graph.out_degree(node) == 0]
),
"files_not_imported": len([node for node in self.graph.nodes() if self.graph.in_degree(node) == 0]),
"files_not_imported": len(
[node for node in self.graph.nodes() if self.graph.in_degree(node) == 0]
),
}

View File

@@ -65,7 +65,9 @@ def setup_logging(verbose: bool = False, quiet: bool = False) -> None:
logging.basicConfig(level=level, format="%(message)s", force=True)
def infer_description_from_docs(base_url: str, first_page_content: str | None = None, name: str = "") -> str:
def infer_description_from_docs(
base_url: str, first_page_content: str | None = None, name: str = ""
) -> str:
"""
Infer skill description from documentation metadata or first page content.
@@ -109,7 +111,13 @@ def infer_description_from_docs(base_url: str, first_page_content: str | None =
# Strategy 3: Extract first meaningful paragraph from main content
# Look for common documentation main content areas
main_content = None
for selector in ["article", "main", 'div[role="main"]', "div.content", "div.doc-content"]:
for selector in [
"article",
"main",
'div[role="main"]',
"div.content",
"div.doc-content",
]:
main_content = soup.select_one(selector)
if main_content:
break
@@ -120,7 +128,8 @@ def infer_description_from_docs(base_url: str, first_page_content: str | None =
text = p.get_text().strip()
# Skip empty, very short, or navigation-like paragraphs
if len(text) > 30 and not any(
skip in text.lower() for skip in ["table of contents", "on this page", "navigation"]
skip in text.lower()
for skip in ["table of contents", "on this page", "navigation"]
):
# Clean and format
if len(text) > 150:
@@ -160,7 +169,8 @@ class DocToSkillConverter:
skip_llms_txt_value = config.get("skip_llms_txt", False)
if not isinstance(skip_llms_txt_value, bool):
logger.warning(
"Invalid value for 'skip_llms_txt': %r (expected bool). Defaulting to False.", skip_llms_txt_value
"Invalid value for 'skip_llms_txt': %r (expected bool). Defaulting to False.",
skip_llms_txt_value,
)
self.skip_llms_txt = False
else:
@@ -381,7 +391,15 @@ class DocToSkillConverter:
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
return self._extract_html_as_markdown(content, url)
page = {"url": url, "title": "", "content": "", "headings": [], "code_samples": [], "patterns": [], "links": []}
page = {
"url": url,
"title": "",
"content": "",
"headings": [],
"code_samples": [],
"patterns": [],
"links": [],
}
lines = content.split("\n")
@@ -397,7 +415,9 @@ class DocToSkillConverter:
if match:
level = len(match.group(1))
text = match.group(2).strip()
page["headings"].append({"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")})
page["headings"].append(
{"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")}
)
# Extract code blocks with language
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
@@ -464,7 +484,15 @@ class DocToSkillConverter:
Falls back to <body> if no semantic content container found.
Language detection uses detect_language() method.
"""
page = {"url": url, "title": "", "content": "", "headings": [], "code_samples": [], "patterns": [], "links": []}
page = {
"url": url,
"title": "",
"content": "",
"headings": [],
"code_samples": [],
"patterns": [],
"links": [],
}
soup = BeautifulSoup(html_content, "html.parser")
@@ -515,7 +543,9 @@ class DocToSkillConverter:
return lang # Return string for backward compatibility
def extract_patterns(self, main: Any, code_samples: list[dict[str, Any]]) -> list[dict[str, str]]:
def extract_patterns(
self, main: Any, code_samples: list[dict[str, Any]]
) -> list[dict[str, str]]:
"""Extract common coding patterns (NEW FEATURE)"""
patterns = []
@@ -527,7 +557,10 @@ class DocToSkillConverter:
next_code = elem.find_next(["pre", "code"])
if next_code:
patterns.append(
{"description": self.clean_text(elem.get_text()), "code": next_code.get_text().strip()}
{
"description": self.clean_text(elem.get_text()),
"code": next_code.get_text().strip(),
}
)
return patterns[:5] # Limit to 5 most relevant patterns
@@ -615,7 +648,9 @@ class DocToSkillConverter:
logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e)
logger.error(" URL: %s", url)
async def scrape_page_async(self, url: str, semaphore: asyncio.Semaphore, client: httpx.AsyncClient) -> None:
async def scrape_page_async(
self, url: str, semaphore: asyncio.Semaphore, client: httpx.AsyncClient
) -> None:
"""Scrape a single page asynchronously.
Args:
@@ -682,7 +717,9 @@ class DocToSkillConverter:
md_url = f"{url}/index.html.md"
md_urls.append(md_url)
logger.info(" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls))
logger.info(
" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls)
)
return md_urls
# ORIGINAL _convert_to_md_urls (with HEAD request validation):
@@ -744,7 +781,9 @@ class DocToSkillConverter:
variants = detector.detect_all()
if variants:
logger.info("\n🔍 Found %d total variant(s), downloading remaining...", len(variants))
logger.info(
"\n🔍 Found %d total variant(s), downloading remaining...", len(variants)
)
for variant_info in variants:
url = variant_info["url"]
variant = variant_info["variant"]
@@ -759,7 +798,9 @@ class DocToSkillConverter:
if extra_content:
extra_filename = extra_downloader.get_proper_filename()
extra_filepath = os.path.join(self.skill_dir, "references", extra_filename)
extra_filepath = os.path.join(
self.skill_dir, "references", extra_filename
)
with open(extra_filepath, "w", encoding="utf-8") as f:
f.write(extra_content)
logger.info("%s (%d chars)", extra_filename, len(extra_content))
@@ -783,7 +824,9 @@ class DocToSkillConverter:
if self.is_valid_url(url) and url not in self.visited_urls:
self.pending_urls.append(url)
logger.info(" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls))
logger.info(
" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls)
)
# Return False to trigger HTML scraping with the populated pending_urls
self.llms_txt_detected = True
@@ -824,7 +867,11 @@ class DocToSkillConverter:
if content:
filename = downloader.get_proper_filename()
downloaded[variant] = {"content": content, "filename": filename, "size": len(content)}
downloaded[variant] = {
"content": content,
"filename": filename,
"size": len(content),
}
logger.info("%s (%d chars)", filename, len(content))
if not downloaded:
@@ -902,7 +949,9 @@ class DocToSkillConverter:
if not self.dry_run and not self.skip_llms_txt:
llms_result = self._try_llms_txt()
if llms_result:
logger.info("\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant)
logger.info(
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
)
self.save_summary()
return
@@ -953,7 +1002,9 @@ class DocToSkillConverter:
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.content, "html.parser")
main_selector = self.config.get("selectors", {}).get("main_content", 'div[role="main"]')
main_selector = self.config.get("selectors", {}).get(
"main_content", 'div[role="main"]'
)
main = soup.select_one(main_selector)
if main:
@@ -968,7 +1019,10 @@ class DocToSkillConverter:
self.scrape_page(url)
self.pages_scraped += 1
if self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0:
if (
self.checkpoint_enabled
and self.pages_scraped % self.checkpoint_interval == 0
):
self.save_checkpoint()
if len(self.visited_urls) % 10 == 0:
@@ -1019,7 +1073,10 @@ class DocToSkillConverter:
with self.lock:
self.pages_scraped += 1
if self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0:
if (
self.checkpoint_enabled
and self.pages_scraped % self.checkpoint_interval == 0
):
self.save_checkpoint()
if self.pages_scraped % 10 == 0:
@@ -1062,7 +1119,9 @@ class DocToSkillConverter:
if not self.dry_run and not self.skip_llms_txt:
llms_result = self._try_llms_txt()
if llms_result:
logger.info("\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant)
logger.info(
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
)
self.save_summary()
return
@@ -1097,7 +1156,9 @@ class DocToSkillConverter:
semaphore = asyncio.Semaphore(self.workers)
# Create shared HTTP client with connection pooling
async with httpx.AsyncClient(timeout=30.0, limits=httpx.Limits(max_connections=self.workers * 2)) as client:
async with httpx.AsyncClient(
timeout=30.0, limits=httpx.Limits(max_connections=self.workers * 2)
) as client:
tasks = []
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
@@ -1120,7 +1181,9 @@ class DocToSkillConverter:
if self.dry_run:
logger.info(" [Preview] %s", url)
else:
task = asyncio.create_task(self.scrape_page_async(url, semaphore, client))
task = asyncio.create_task(
self.scrape_page_async(url, semaphore, client)
)
tasks.append(task)
# Wait for batch to complete before continuing
@@ -1145,7 +1208,9 @@ class DocToSkillConverter:
if self.dry_run:
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
if len(self.visited_urls) >= preview_limit:
logger.info(" (showing first %d, actual scraping may find more)", int(preview_limit))
logger.info(
" (showing first %d, actual scraping may find more)", int(preview_limit)
)
logger.info("\n💡 To actually scrape, run without --dry-run")
else:
logger.info("\n✅ Scraped %d pages (async mode)", len(self.visited_urls))
@@ -1178,8 +1243,12 @@ class DocToSkillConverter:
with open(json_file, encoding="utf-8") as f:
pages.append(json.load(f))
except Exception as e:
logger.error("⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e)
logger.error(" Suggestion: File may be corrupted, consider re-scraping with --fresh")
logger.error(
"⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e
)
logger.error(
" Suggestion: File may be corrupted, consider re-scraping with --fresh"
)
return pages
@@ -1197,7 +1266,9 @@ class DocToSkillConverter:
for page in pages:
url = page["url"].lower()
title = page["title"].lower()
content = page.get("content", "").lower()[:CONTENT_PREVIEW_LENGTH] # Check first N chars for categorization
content = page.get("content", "").lower()[
:CONTENT_PREVIEW_LENGTH
] # Check first N chars for categorization
categorized = False
@@ -1232,7 +1303,9 @@ class DocToSkillConverter:
for page in pages:
path = urlparse(page["url"]).path
segments = [s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]]
segments = [
s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]
]
for seg in segments:
url_segments[seg] += 1
@@ -1246,10 +1319,14 @@ class DocToSkillConverter:
categories[seg] = [seg]
# Add common defaults
if "tutorial" not in categories and any("tutorial" in url for url in [p["url"] for p in pages]):
if "tutorial" not in categories and any(
"tutorial" in url for url in [p["url"] for p in pages]
):
categories["tutorials"] = ["tutorial", "guide", "getting-started"]
if "api" not in categories and any("api" in url or "reference" in url for url in [p["url"] for p in pages]):
if "api" not in categories and any(
"api" in url or "reference" in url for url in [p["url"] for p in pages]
):
categories["api"] = ["api", "reference", "class"]
return categories
@@ -1551,12 +1628,16 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
# Validate name (alphanumeric, hyphens, underscores only)
if "name" in config:
if not re.match(r"^[a-zA-Z0-9_-]+$", config["name"]):
errors.append(f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)")
errors.append(
f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)"
)
# Validate base_url
if "base_url" in config:
if not config["base_url"].startswith(("http://", "https://")):
errors.append(f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)")
errors.append(
f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)"
)
# Validate selectors structure
if "selectors" in config:
@@ -1596,7 +1677,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
if rate < 0:
errors.append(f"'rate_limit' must be non-negative (got {rate})")
elif rate > 10:
warnings.append(f"'rate_limit' is very high ({rate}s) - this may slow down scraping significantly")
warnings.append(
f"'rate_limit' is very high ({rate}s) - this may slow down scraping significantly"
)
except (ValueError, TypeError):
errors.append(f"'rate_limit' must be a number (got {config['rate_limit']})")
@@ -1606,19 +1689,29 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
# Allow None for unlimited
if max_p_value is None:
warnings.append("'max_pages' is None (unlimited) - this will scrape ALL pages. Use with caution!")
warnings.append(
"'max_pages' is None (unlimited) - this will scrape ALL pages. Use with caution!"
)
else:
try:
max_p = int(max_p_value)
# Allow -1 for unlimited
if max_p == -1:
warnings.append("'max_pages' is -1 (unlimited) - this will scrape ALL pages. Use with caution!")
warnings.append(
"'max_pages' is -1 (unlimited) - this will scrape ALL pages. Use with caution!"
)
elif max_p < 1:
errors.append(f"'max_pages' must be at least 1 or -1 for unlimited (got {max_p})")
errors.append(
f"'max_pages' must be at least 1 or -1 for unlimited (got {max_p})"
)
elif max_p > MAX_PAGES_WARNING_THRESHOLD:
warnings.append(f"'max_pages' is very high ({max_p}) - scraping may take a very long time")
warnings.append(
f"'max_pages' is very high ({max_p}) - scraping may take a very long time"
)
except (ValueError, TypeError):
errors.append(f"'max_pages' must be an integer, -1, or null (got {config['max_pages']})")
errors.append(
f"'max_pages' must be an integer, -1, or null (got {config['max_pages']})"
)
# Validate start_urls if present
if "start_urls" in config:
@@ -1627,7 +1720,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
else:
for url in config["start_urls"]:
if not url.startswith(("http://", "https://")):
errors.append(f"Invalid start_url: '{url}' (must start with http:// or https://)")
errors.append(
f"Invalid start_url: '{url}' (must start with http:// or https://)"
)
return errors, warnings
@@ -1716,7 +1811,9 @@ def interactive_config() -> dict[str, Any]:
# Selectors
logger.info("\nCSS Selectors (press Enter for defaults):")
selectors = {}
selectors["main_content"] = input(" Main content [div[role='main']]: ").strip() or "div[role='main']"
selectors["main_content"] = (
input(" Main content [div[role='main']]: ").strip() or "div[role='main']"
)
selectors["title"] = input(" Title [title]: ").strip() or "title"
selectors["code_blocks"] = input(" Code blocks [pre code]: ").strip() or "pre code"
config["selectors"] = selectors
@@ -1782,15 +1879,27 @@ def setup_argument_parser() -> argparse.ArgumentParser:
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive configuration mode")
parser.add_argument("--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)")
parser.add_argument(
"--interactive", "-i", action="store_true", help="Interactive configuration mode"
)
parser.add_argument(
"--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)"
)
parser.add_argument("--name", type=str, help="Skill name")
parser.add_argument("--url", type=str, help="Base documentation URL")
parser.add_argument("--description", "-d", type=str, help="Skill description")
parser.add_argument("--skip-scrape", action="store_true", help="Skip scraping, use existing data")
parser.add_argument("--dry-run", action="store_true", help="Preview what will be scraped without actually scraping")
parser.add_argument(
"--enhance", action="store_true", help="Enhance SKILL.md using Claude API after building (requires API key)"
"--skip-scrape", action="store_true", help="Skip scraping, use existing data"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Preview what will be scraped without actually scraping",
)
parser.add_argument(
"--enhance",
action="store_true",
help="Enhance SKILL.md using Claude API after building (requires API key)",
)
parser.add_argument(
"--enhance-local",
@@ -1802,8 +1911,14 @@ def setup_argument_parser() -> argparse.ArgumentParser:
action="store_true",
help="Open terminal window for enhancement (use with --enhance-local)",
)
parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)")
parser.add_argument("--resume", action="store_true", help="Resume from last checkpoint (for interrupted scrapes)")
parser.add_argument(
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
)
parser.add_argument(
"--resume",
action="store_true",
help="Resume from last checkpoint (for interrupted scrapes)",
)
parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh")
parser.add_argument(
"--rate-limit",
@@ -1826,10 +1941,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
help="Enable async mode for better parallel performance (2-3x faster than threads)",
)
parser.add_argument(
"--no-rate-limit", action="store_true", help="Disable rate limiting completely (same as --rate-limit 0)"
"--no-rate-limit",
action="store_true",
help="Disable rate limiting completely (same as --rate-limit 0)",
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)"
)
parser.add_argument(
"--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)"
)
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)")
parser.add_argument("--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)")
return parser
@@ -1866,7 +1987,11 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
"name": args.name,
"description": args.description or f"Use when working with {args.name}",
"base_url": args.url,
"selectors": {"main_content": "div[role='main']", "title": "title", "code_blocks": "pre code"},
"selectors": {
"main_content": "div[role='main']",
"title": "title",
"code_blocks": "pre code",
},
"url_patterns": {"include": [], "exclude": []},
"rate_limit": DEFAULT_RATE_LIMIT,
"max_pages": DEFAULT_MAX_PAGES,
@@ -1903,12 +2028,16 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
if config.get("workers", 1) > 1:
logger.info("⚡ Async mode enabled (2-3x faster than threads)")
else:
logger.warning("⚠️ Async mode enabled but workers=1. Consider using --workers 4 for better performance")
logger.warning(
"⚠️ Async mode enabled but workers=1. Consider using --workers 4 for better performance"
)
return config
def execute_scraping_and_building(config: dict[str, Any], args: argparse.Namespace) -> Optional["DocToSkillConverter"]:
def execute_scraping_and_building(
config: dict[str, Any], args: argparse.Namespace
) -> Optional["DocToSkillConverter"]:
"""Execute the scraping and skill building process.
Handles dry run mode, existing data checks, scraping with checkpoints,
@@ -1995,7 +2124,10 @@ def execute_scraping_and_building(config: dict[str, Any], args: argparse.Namespa
if converter.checkpoint_enabled:
converter.save_checkpoint()
logger.info("💾 Progress saved to checkpoint")
logger.info(" Resume with: --config %s --resume", args.config if args.config else "config.json")
logger.info(
" Resume with: --config %s --resume",
args.config if args.config else "config.json",
)
response = input("Continue with skill building? (y/n): ").strip().lower()
if response != "y":
return None
@@ -2086,7 +2218,9 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
logger.info(" or re-run with: --enhance-local")
logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"])
logger.info(" or re-run with: --enhance")
logger.info("\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window")
logger.info(
"\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window"
)
def main() -> None:

View File

@@ -41,7 +41,9 @@ class SkillEnhancer:
self.skill_md_path = self.skill_dir / "SKILL.md"
# Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN")
self.api_key = (
api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN")
)
if not self.api_key:
raise ValueError(
"No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
@@ -174,7 +176,9 @@ This skill combines knowledge from {len(sources_found)} source type(s):
if repo_id:
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
else:
prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
prompt += (
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
)
prompt += f"```markdown\n{content}\n```\n"
prompt += """
@@ -295,7 +299,9 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
# Read reference files
print("📖 Reading reference documentation...")
references = read_reference_files(self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT)
references = read_reference_files(
self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT
)
if not references:
print("❌ No reference files found to analyze")
@@ -334,7 +340,9 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
print("\n✅ Enhancement complete!")
print("\nNext steps:")
print(f" 1. Review: {self.skill_md_path}")
print(f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}")
print(
f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}"
)
print(" 3. Package your skill:")
print(f" skill-seekers package {self.skill_dir}/")
@@ -367,15 +375,21 @@ Examples:
""",
)
parser.add_argument("skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)")
parser.add_argument("--api-key", type=str, help="Platform API key (or set environment variable)")
parser.add_argument(
"skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)"
)
parser.add_argument(
"--api-key", type=str, help="Platform API key (or set environment variable)"
)
parser.add_argument(
"--target",
choices=["claude", "gemini", "openai"],
default="claude",
help="Target LLM platform (default: claude)",
)
parser.add_argument("--dry-run", action="store_true", help="Show what would be done without calling API")
parser.add_argument(
"--dry-run", action="store_true", help="Show what would be done without calling API"
)
args = parser.parse_args()
@@ -447,7 +461,9 @@ Examples:
print("\n✅ Enhancement complete!")
print("\nNext steps:")
print(f" 1. Review: {Path(skill_dir) / 'SKILL.md'}")
print(f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}")
print(
f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}"
)
print(" 3. Package your skill:")
print(f" skill-seekers package {skill_dir}/ --target {args.target}")

View File

@@ -216,7 +216,9 @@ class LocalSkillEnhancer:
if use_summarization or total_ref_size > 30000:
if not use_summarization:
print(f" ⚠️ Large skill detected ({total_ref_size:,} chars)")
print(f" 📊 Applying smart summarization (target: {int(summarization_ratio * 100)}% of original)")
print(
f" 📊 Applying smart summarization (target: {int(summarization_ratio * 100)}% of original)"
)
print()
# Summarize each reference
@@ -307,7 +309,9 @@ REFERENCE DOCUMENTATION:
if repo_id:
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
else:
prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
prompt += (
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
)
prompt += f"{content}\n"
prompt += f"""
@@ -528,7 +532,9 @@ After writing, the file SKILL.md should:
return False
# Save prompt to temp file
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
) as f:
prompt_file = f.name
f.write(prompt)
@@ -605,7 +611,9 @@ rm {prompt_file}
print(f" - Prompt file: {prompt_file}")
print(f" - Skill directory: {self.skill_dir.absolute()}")
print(f" - SKILL.md will be saved to: {self.skill_md_path.absolute()}")
print(f" - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}")
print(
f" - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}"
)
print()
print("⏳ Wait for Claude Code to finish in the other terminal...")
print(" (Usually takes 30-60 seconds)")
@@ -782,7 +790,9 @@ rm {prompt_file}
return
# Save prompt to temp file
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
) as f:
prompt_file = f.name
f.write(prompt)
@@ -791,7 +801,9 @@ rm {prompt_file}
# Run enhancement
if headless:
# Run headless (subprocess.run - blocking in thread)
result = subprocess.run(["claude", prompt_file], capture_output=True, text=True, timeout=timeout)
result = subprocess.run(
["claude", prompt_file], capture_output=True, text=True, timeout=timeout
)
# Clean up
try:
@@ -800,9 +812,13 @@ rm {prompt_file}
pass
if result.returncode == 0:
self.write_status("completed", "Enhancement completed successfully!", progress=1.0)
self.write_status(
"completed", "Enhancement completed successfully!", progress=1.0
)
else:
self.write_status("failed", error=f"Claude returned error: {result.returncode}")
self.write_status(
"failed", error=f"Claude returned error: {result.returncode}"
)
else:
# Terminal mode in background doesn't make sense
self.write_status("failed", error="Terminal mode not supported in background")
@@ -951,7 +967,10 @@ except Exception as e:
# Normal mode: Log to file
with open(log_file, "w") as log:
subprocess.Popen(
["nohup", "python3", str(daemon_script_path)], stdout=log, stderr=log, start_new_session=True
["nohup", "python3", str(daemon_script_path)],
stdout=log,
stderr=log,
start_new_session=True,
)
# Give daemon time to start
@@ -1033,10 +1052,14 @@ Force Mode (Default ON):
)
parser.add_argument(
"--background", action="store_true", help="Run in background and return immediately (non-blocking)"
"--background",
action="store_true",
help="Run in background and return immediately (non-blocking)",
)
parser.add_argument("--daemon", action="store_true", help="Run as persistent daemon process (fully detached)")
parser.add_argument(
"--daemon", action="store_true", help="Run as persistent daemon process (fully detached)"
)
parser.add_argument(
"--no-force",
@@ -1045,7 +1068,10 @@ Force Mode (Default ON):
)
parser.add_argument(
"--timeout", type=int, default=600, help="Timeout in seconds for headless mode (default: 600 = 10 minutes)"
"--timeout",
type=int,
default=600,
help="Timeout in seconds for headless mode (default: 600 = 10 minutes)",
)
args = parser.parse_args()
@@ -1053,7 +1079,9 @@ Force Mode (Default ON):
# Validate mutually exclusive options
mode_count = sum([args.interactive_enhancement, args.background, args.daemon])
if mode_count > 1:
print("❌ Error: --interactive-enhancement, --background, and --daemon are mutually exclusive")
print(
"❌ Error: --interactive-enhancement, --background, and --daemon are mutually exclusive"
)
print(" Choose only one mode")
sys.exit(1)
@@ -1061,7 +1089,9 @@ Force Mode (Default ON):
# Force mode is ON by default, use --no-force to disable
enhancer = LocalSkillEnhancer(args.skill_directory, force=not args.no_force)
headless = not args.interactive_enhancement # Invert: default is headless
success = enhancer.run(headless=headless, timeout=args.timeout, background=args.background, daemon=args.daemon)
success = enhancer.run(
headless=headless, timeout=args.timeout, background=args.background, daemon=args.daemon
)
sys.exit(0 if success else 1)

View File

@@ -149,12 +149,17 @@ Examples:
parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)")
parser.add_argument(
"--watch", "-w", action="store_true", help="Watch status in real-time (updates every 2 seconds)"
"--watch",
"-w",
action="store_true",
help="Watch status in real-time (updates every 2 seconds)",
)
parser.add_argument("--json", action="store_true", help="Output raw JSON (for scripting)")
parser.add_argument("--interval", type=int, default=2, help="Watch update interval in seconds (default: 2)")
parser.add_argument(
"--interval", type=int, default=2, help="Watch update interval in seconds (default: 2)"
)
args = parser.parse_args()

View File

@@ -17,7 +17,11 @@ from bs4 import BeautifulSoup
# Add parent directory to path for imports when run as script
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from skill_seekers.cli.constants import DEFAULT_MAX_DISCOVERY, DEFAULT_RATE_LIMIT, DISCOVERY_THRESHOLD
from skill_seekers.cli.constants import (
DEFAULT_MAX_DISCOVERY,
DEFAULT_RATE_LIMIT,
DISCOVERY_THRESHOLD,
)
def estimate_pages(config, max_discovery=DEFAULT_MAX_DISCOVERY, timeout=30):
@@ -306,7 +310,12 @@ def list_all_configs():
description = description[:57] + "..."
by_category[category].append(
{"file": config_file.name, "path": str(rel_path), "name": name, "description": description}
{
"file": config_file.name,
"path": str(rel_path),
"name": name,
"description": description,
}
)
except Exception as e:
# If we can't parse the config, just use the filename
@@ -366,7 +375,11 @@ Examples:
)
parser.add_argument("config", nargs="?", help="Path to config JSON file")
parser.add_argument("--all", action="store_true", help="List all available configs from api/configs_repo/official/")
parser.add_argument(
"--all",
action="store_true",
help="List all available configs from api/configs_repo/official/",
)
parser.add_argument(
"--max-discovery",
"-m",
@@ -380,7 +393,13 @@ Examples:
action="store_true",
help="Remove discovery limit - discover all pages (same as --max-discovery -1)",
)
parser.add_argument("--timeout", "-t", type=int, default=30, help="HTTP request timeout in seconds (default: 30)")
parser.add_argument(
"--timeout",
"-t",
type=int,
default=30,
help="HTTP request timeout in seconds (default: 30)",
)
args = parser.parse_args()

View File

@@ -35,7 +35,10 @@ class RouterGenerator:
"""Generates router skills that direct to specialized sub-skills with GitHub integration"""
def __init__(
self, config_paths: list[str], router_name: str = None, github_streams: Optional["ThreeStreamData"] = None
self,
config_paths: list[str],
router_name: str = None,
github_streams: Optional["ThreeStreamData"] = None,
):
"""
Initialize router generator with optional GitHub streams.
@@ -124,7 +127,10 @@ class RouterGenerator:
label = label_info["label"].lower()
# Check if label relates to any skill keyword
if any(keyword.lower() in label or label in keyword.lower() for keyword in skill_keywords):
if any(
keyword.lower() in label or label in keyword.lower()
for keyword in skill_keywords
):
# Add twice for 2x weight
keywords.append(label)
keywords.append(label)
@@ -217,9 +223,13 @@ class RouterGenerator:
if unique_topics:
topics_str = ", ".join(unique_topics)
description = f"{self.router_name.title()} framework. Use when working with: {topics_str}"
description = (
f"{self.router_name.title()} framework. Use when working with: {topics_str}"
)
else:
description = f"Use when working with {self.router_name.title()} development and programming"
description = (
f"Use when working with {self.router_name.title()} development and programming"
)
# Truncate to 200 chars for performance (agentskills.io recommendation)
if len(description) > 200:
@@ -357,7 +367,9 @@ compatibility: {compatibility}
topic = self._extract_topic_from_skill(first_skill)
keyword = first_keywords[0] if first_keywords else topic
examples.append(f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill')
examples.append(
f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill'
)
# Example 2: Different skill (second sub-skill if available)
if len(skill_names) >= 2:
@@ -434,7 +446,9 @@ compatibility: {compatibility}
f"**A:** Activates {skill_name} skill"
)
return "\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords)
return (
"\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords)
)
def _convert_issue_to_question(self, issue_title: str) -> str:
"""
@@ -492,7 +506,9 @@ compatibility: {compatibility}
patterns = []
# Top 5 closed issues with most engagement (comments indicate usefulness)
top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[:5]
top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[
:5
]
for issue in top_solutions:
title = issue.get("title", "")
@@ -1000,8 +1016,12 @@ GitHub issues related to this topic:
md = "# Common GitHub Issues\n\n"
md += "Top issues reported by the community:\n\n"
common_problems = self.github_issues.get("common_problems", [])[:10] if self.github_issues else []
known_solutions = self.github_issues.get("known_solutions", [])[:10] if self.github_issues else []
common_problems = (
self.github_issues.get("common_problems", [])[:10] if self.github_issues else []
)
known_solutions = (
self.github_issues.get("known_solutions", [])[:10] if self.github_issues else []
)
if common_problems:
md += "## Open Issues (Common Problems)\n\n"

View File

@@ -77,7 +77,11 @@ class GitHubThreeStreamFetcher:
"""
def __init__(
self, repo_url: str, github_token: str | None = None, interactive: bool = True, profile_name: str | None = None
self,
repo_url: str,
github_token: str | None = None,
interactive: bool = True,
profile_name: str | None = None,
):
"""
Initialize fetcher.
@@ -412,7 +416,9 @@ class GitHubThreeStreamFetcher:
continue
# Skip hidden files (but allow docs in docs/ directories)
is_in_docs_dir = any(pattern in str(file_path) for pattern in ["docs/", "doc/", "documentation/"])
is_in_docs_dir = any(
pattern in str(file_path) for pattern in ["docs/", "doc/", "documentation/"]
)
if any(part.startswith(".") for part in file_path.parts):
if not is_in_docs_dir:
continue
@@ -495,9 +501,15 @@ class GitHubThreeStreamFetcher:
label_counts = Counter(all_labels)
return {
"common_problems": sorted(common_problems, key=lambda x: x["comments"], reverse=True)[:10],
"known_solutions": sorted(known_solutions, key=lambda x: x["comments"], reverse=True)[:10],
"top_labels": [{"label": label, "count": count} for label, count in label_counts.most_common(10)],
"common_problems": sorted(common_problems, key=lambda x: x["comments"], reverse=True)[
:10
],
"known_solutions": sorted(known_solutions, key=lambda x: x["comments"], reverse=True)[
:10
],
"top_labels": [
{"label": label, "count": count} for label, count in label_counts.most_common(10)
],
}
def read_file(self, file_path: Path) -> str | None:

View File

@@ -178,7 +178,9 @@ class GitHubScraper:
self.repo_name = config["repo"]
self.name = config.get("name", self.repo_name.split("/")[-1])
# Set initial description (will be improved after README extraction if not in config)
self.description = config.get("description", f"Use when working with {self.repo_name.split('/')[-1]}")
self.description = config.get(
"description", f"Use when working with {self.repo_name.split('/')[-1]}"
)
# Local repository path (optional - enables unlimited analysis)
self.local_repo_path = local_repo_path or config.get("local_repo_path")
@@ -192,14 +194,18 @@ class GitHubScraper:
# Option 1: Replace mode - Use only specified exclusions
if "exclude_dirs" in config:
self.excluded_dirs = set(config["exclude_dirs"])
logger.warning(f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden")
logger.warning(
f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden"
)
logger.debug(f"Custom exclusions: {sorted(self.excluded_dirs)}")
# Option 2: Extend mode - Add to default exclusions
elif "exclude_dirs_additional" in config:
additional = set(config["exclude_dirs_additional"])
self.excluded_dirs = self.excluded_dirs.union(additional)
logger.info(f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})")
logger.info(
f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})"
)
logger.debug(f"Additional exclusions: {sorted(additional)}")
# Load .gitignore for additional exclusions (C2.1)
@@ -218,7 +224,9 @@ class GitHubScraper:
self.include_changelog = config.get("include_changelog", True)
self.include_releases = config.get("include_releases", True)
self.include_code = config.get("include_code", False)
self.code_analysis_depth = config.get("code_analysis_depth", "surface") # 'surface', 'deep', 'full'
self.code_analysis_depth = config.get(
"code_analysis_depth", "surface"
) # 'surface', 'deep', 'full'
self.file_patterns = config.get("file_patterns", [])
# Initialize code analyzer if deep analysis requested
@@ -261,7 +269,9 @@ class GitHubScraper:
logger.warning("Using GitHub token from config file (less secure)")
return token
logger.warning("No GitHub token provided - using unauthenticated access (lower rate limits)")
logger.warning(
"No GitHub token provided - using unauthenticated access (lower rate limits)"
)
return None
def scrape(self) -> dict[str, Any]:
@@ -334,7 +344,9 @@ class GitHubScraper:
"topics": self.repo.get_topics(),
}
logger.info(f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)")
logger.info(
f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)"
)
except GithubException as e:
if e.status == 404:
@@ -378,7 +390,9 @@ class GitHubScraper:
file_size = getattr(content, "size", 0)
if download_url:
logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...")
logger.info(
f"File {file_path} is large ({file_size:,} bytes), downloading via URL..."
)
try:
import requests
@@ -389,7 +403,9 @@ class GitHubScraper:
logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
return None
else:
logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})")
logger.warning(
f"File {file_path} has no download URL (encoding={content.encoding})"
)
return None
# Handle regular files - decode content
@@ -419,7 +435,14 @@ class GitHubScraper:
logger.info("Extracting README...")
# Try common README locations
readme_files = ["README.md", "README.rst", "README.txt", "README", "docs/README.md", ".github/README.md"]
readme_files = [
"README.md",
"README.rst",
"README.txt",
"README",
"docs/README.md",
".github/README.md",
]
for readme_path in readme_files:
readme_content = self._get_file_content(readme_path)
@@ -429,7 +452,9 @@ class GitHubScraper:
# Update description if not explicitly set in config
if "description" not in self.config:
smart_description = extract_description_from_readme(self.extracted_data["readme"], self.repo_name)
smart_description = extract_description_from_readme(
self.extracted_data["readme"], self.repo_name
)
self.description = smart_description
logger.debug(f"Generated description: {self.description}")
@@ -465,7 +490,9 @@ class GitHubScraper:
self.extracted_data["languages"] = {
lang: {
"bytes": bytes_count,
"percentage": round((bytes_count / total_bytes) * 100, 2) if total_bytes > 0 else 0,
"percentage": round((bytes_count / total_bytes) * 100, 2)
if total_bytes > 0
else 0,
}
for lang, bytes_count in languages.items()
}
@@ -502,7 +529,9 @@ class GitHubScraper:
# For directories, we need to check both with and without trailing slash
# as .gitignore patterns can match either way
dir_path_with_slash = dir_path if dir_path.endswith("/") else dir_path + "/"
if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(dir_path_with_slash):
if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(
dir_path_with_slash
):
logger.debug(f"Directory excluded by .gitignore: {dir_path}")
return True
@@ -555,7 +584,9 @@ class GitHubScraper:
return
# Log exclusions for debugging
logger.info(f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}")
logger.info(
f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}"
)
file_tree = []
excluded_count = 0
@@ -594,7 +625,9 @@ class GitHubScraper:
file_tree.append({"path": file_path, "type": "file", "size": file_size})
self.extracted_data["file_tree"] = file_tree
logger.info(f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)")
logger.info(
f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)"
)
def _extract_file_tree_github(self):
"""Extract file tree from GitHub API (rate-limited)."""
@@ -695,10 +728,16 @@ class GitHubScraper:
file_content = self.repo.get_contents(file_path)
content = file_content.decoded_content.decode("utf-8")
analysis_result = self.code_analyzer.analyze_file(file_path, content, primary_language)
analysis_result = self.code_analyzer.analyze_file(
file_path, content, primary_language
)
if analysis_result and (analysis_result.get("classes") or analysis_result.get("functions")):
analyzed_files.append({"file": file_path, "language": primary_language, **analysis_result})
if analysis_result and (
analysis_result.get("classes") or analysis_result.get("functions")
):
analyzed_files.append(
{"file": file_path, "language": primary_language, **analysis_result}
)
logger.debug(
f"Analyzed {file_path}: "
@@ -805,7 +844,9 @@ class GitHubScraper:
"draft": release.draft,
"prerelease": release.prerelease,
"created_at": release.created_at.isoformat() if release.created_at else None,
"published_at": release.published_at.isoformat() if release.published_at else None,
"published_at": release.published_at.isoformat()
if release.published_at
else None,
"url": release.html_url,
"tarball_url": release.tarball_url,
"zipball_url": release.zipball_url,
@@ -973,13 +1014,21 @@ Use this skill when you need to:
if has_c3_data:
skill_content += "\n### Codebase Analysis References\n\n"
if c3_data.get("patterns"):
skill_content += "- `references/codebase_analysis/patterns/` - Design patterns detected\n"
skill_content += (
"- `references/codebase_analysis/patterns/` - Design patterns detected\n"
)
if c3_data.get("test_examples"):
skill_content += "- `references/codebase_analysis/examples/` - Test examples extracted\n"
skill_content += (
"- `references/codebase_analysis/examples/` - Test examples extracted\n"
)
if c3_data.get("config_patterns"):
skill_content += "- `references/codebase_analysis/configuration/` - Configuration analysis\n"
skill_content += (
"- `references/codebase_analysis/configuration/` - Configuration analysis\n"
)
if c3_data.get("architecture"):
skill_content += "- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n"
skill_content += (
"- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n"
)
# Usage
skill_content += "\n## 💻 Usage\n\n"
@@ -1020,7 +1069,9 @@ Use this skill when you need to:
lines = []
for release in releases[:3]:
lines.append(f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}")
lines.append(
f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}"
)
return "\n".join(lines)
@@ -1132,7 +1183,9 @@ Use this skill when you need to:
if patterns:
content += "**Architectural Patterns:**\n"
for pattern in patterns[:5]:
content += f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n"
content += (
f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n"
)
content += "\n"
# Dependencies (C2.6)
@@ -1233,7 +1286,9 @@ Use this skill when you need to:
"""Generate releases.md reference file."""
releases = self.data["releases"]
content = f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n"
content = (
f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n"
)
for release in releases:
content += f"## {release['tag_name']}: {release['name']}\n"
@@ -1294,14 +1349,22 @@ Examples:
parser.add_argument("--max-issues", type=int, default=100, help="Max issues to fetch")
parser.add_argument("--scrape-only", action="store_true", help="Only scrape, don't build skill")
parser.add_argument(
"--enhance", action="store_true", help="Enhance SKILL.md using Claude API after building (requires API key)"
"--enhance",
action="store_true",
help="Enhance SKILL.md using Claude API after building (requires API key)",
)
parser.add_argument(
"--enhance-local", action="store_true", help="Enhance SKILL.md using Claude Code (no API key needed)"
"--enhance-local",
action="store_true",
help="Enhance SKILL.md using Claude Code (no API key needed)",
)
parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)")
parser.add_argument(
"--non-interactive", action="store_true", help="Non-interactive mode for CI/CD (fail fast on rate limits)"
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
)
parser.add_argument(
"--non-interactive",
action="store_true",
help="Non-interactive mode for CI/CD (fail fast on rate limits)",
)
parser.add_argument("--profile", type=str, help="GitHub profile name to use from config")
@@ -1368,7 +1431,9 @@ Examples:
api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
logger.error("❌ ANTHROPIC_API_KEY not set. Use --api-key or set environment variable.")
logger.error(
"❌ ANTHROPIC_API_KEY not set. Use --api-key or set environment variable."
)
logger.info("💡 Tip: Use --enhance-local instead (no API key needed)")
else:
# Import and run API enhancement
@@ -1378,7 +1443,9 @@ Examples:
enhance_skill_md(skill_dir, api_key)
logger.info("✅ API enhancement complete!")
except ImportError:
logger.error("❌ API enhancement not available. Install: pip install anthropic")
logger.error(
"❌ API enhancement not available. Install: pip install anthropic"
)
logger.info("💡 Tip: Use --enhance-local instead (no API key needed)")
logger.info(f"\n✅ Success! Skill created at: {skill_dir}/")

View File

@@ -92,7 +92,9 @@ class GuideEnhancer:
self.client = anthropic.Anthropic(api_key=self.api_key)
logger.info("✨ GuideEnhancer initialized in API mode")
else:
logger.warning("⚠️ API mode requested but anthropic library not available or no API key")
logger.warning(
"⚠️ API mode requested but anthropic library not available or no API key"
)
self.mode = "none"
elif self.mode == "local":
# Check if claude CLI is available
@@ -133,7 +135,9 @@ class GuideEnhancer:
def _check_claude_cli(self) -> bool:
"""Check if Claude Code CLI is available."""
try:
result = subprocess.run(["claude", "--version"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["claude", "--version"], capture_output=True, text=True, timeout=5
)
return result.returncode == 0
except (FileNotFoundError, subprocess.TimeoutExpired):
return False
@@ -251,7 +255,9 @@ class GuideEnhancer:
try:
data = json.loads(response)
return [
PrerequisiteItem(name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", ""))
PrerequisiteItem(
name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", "")
)
for item in data.get("prerequisites_detailed", [])
]
except (json.JSONDecodeError, KeyError) as e:
@@ -345,7 +351,9 @@ class GuideEnhancer:
try:
response = self.client.messages.create(
model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}]
model="claude-sonnet-4-20250514",
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
)
return response.content[0].text
except Exception as e:
@@ -690,7 +698,11 @@ IMPORTANT: Return ONLY valid JSON.
# Prerequisites
if "prerequisites_detailed" in data:
enhanced["prerequisites_detailed"] = [
PrerequisiteItem(name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", ""))
PrerequisiteItem(
name=item.get("name", ""),
why=item.get("why", ""),
setup=item.get("setup", ""),
)
for item in data["prerequisites_detailed"]
]

View File

@@ -140,7 +140,9 @@ class GuideCollection:
return {
"total_guides": self.total_guides,
"guides_by_complexity": self.guides_by_complexity,
"guides_by_use_case": {k: [g.to_dict() for g in v] for k, v in self.guides_by_use_case.items()},
"guides_by_use_case": {
k: [g.to_dict() for g in v] for k, v in self.guides_by_use_case.items()
},
"guides": [g.to_dict() for g in self.guides],
}
@@ -224,7 +226,10 @@ class WorkflowAnalyzer:
steps.append(
WorkflowStep(
step_number=step_num, code=step_code, description=description, verification=verification
step_number=step_num,
code=step_code,
description=description,
verification=verification,
)
)
step_num += 1
@@ -253,7 +258,9 @@ class WorkflowAnalyzer:
step_code = "\n".join(current_step)
description = self._infer_description_from_code(step_code)
steps.append(WorkflowStep(step_number=step_num, code=step_code, description=description))
steps.append(
WorkflowStep(step_number=step_num, code=step_code, description=description)
)
step_num += 1
current_step = []
continue
@@ -264,7 +271,9 @@ class WorkflowAnalyzer:
if current_step:
step_code = "\n".join(current_step)
description = self._infer_description_from_code(step_code)
steps.append(WorkflowStep(step_number=step_num, code=step_code, description=description))
steps.append(
WorkflowStep(step_number=step_num, code=step_code, description=description)
)
return steps
@@ -400,7 +409,9 @@ class WorkflowAnalyzer:
class WorkflowGrouper:
"""Group related workflows into coherent guides"""
def group_workflows(self, workflows: list[dict], strategy: str = "ai-tutorial-group") -> dict[str, list[dict]]:
def group_workflows(
self, workflows: list[dict], strategy: str = "ai-tutorial-group"
) -> dict[str, list[dict]]:
"""
Group workflows using specified strategy.
@@ -854,7 +865,9 @@ class HowToGuideBuilder:
if not workflows:
logger.warning("No workflow examples found!")
return GuideCollection(total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[])
return GuideCollection(
total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[]
)
# Group workflows
grouped_workflows = self.grouper.group_workflows(workflows, grouping_strategy)
@@ -914,7 +927,9 @@ class HowToGuideBuilder:
# Extract source files
source_files = [w.get("file_path", "") for w in workflows]
source_files = [f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows)]
source_files = [
f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows)
]
# Create guide
guide = HowToGuide(
@@ -1126,9 +1141,13 @@ Grouping Strategies:
""",
)
parser.add_argument("input", nargs="?", help="Input: directory with test files OR test_examples.json file")
parser.add_argument(
"input", nargs="?", help="Input: directory with test files OR test_examples.json file"
)
parser.add_argument("--input", dest="input_file", help="Input JSON file with test examples (from C3.2)")
parser.add_argument(
"--input", dest="input_file", help="Input JSON file with test examples (from C3.2)"
)
parser.add_argument(
"--output",
@@ -1145,7 +1164,9 @@ Grouping Strategies:
parser.add_argument("--no-ai", action="store_true", help="Disable AI enhancement")
parser.add_argument("--json-output", action="store_true", help="Output JSON summary instead of markdown files")
parser.add_argument(
"--json-output", action="store_true", help="Output JSON summary instead of markdown files"
)
args = parser.parse_args()
@@ -1191,7 +1212,9 @@ Grouping Strategies:
builder = HowToGuideBuilder(enhance_with_ai=not args.no_ai)
output_dir = Path(args.output) if not args.json_output else None
collection = builder.build_guides_from_examples(examples, grouping_strategy=args.group_by, output_dir=output_dir)
collection = builder.build_guides_from_examples(
examples, grouping_strategy=args.group_by, output_dir=output_dir
)
# Output results
if args.json_output:

View File

@@ -366,11 +366,17 @@ Supported agents:
parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)")
parser.add_argument("--agent", required=True, help="Agent name (use 'all' to install to all agents)")
parser.add_argument(
"--agent", required=True, help="Agent name (use 'all' to install to all agents)"
)
parser.add_argument("--force", action="store_true", help="Overwrite existing installation without asking")
parser.add_argument(
"--force", action="store_true", help="Overwrite existing installation without asking"
)
parser.add_argument("--dry-run", action="store_true", help="Preview installation without making changes")
parser.add_argument(
"--dry-run", action="store_true", help="Preview installation without making changes"
)
args = parser.parse_args()
@@ -442,7 +448,9 @@ Supported agents:
if args.dry_run:
print("\n🔍 DRY RUN MODE - No changes will be made\n")
success, message = install_to_agent(skill_dir, agent_name, force=args.force, dry_run=args.dry_run)
success, message = install_to_agent(
skill_dir, agent_name, force=args.force, dry_run=args.dry_run
)
print(message)

View File

@@ -37,6 +37,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
# Import the MCP tool function (with lazy loading)
try:
from skill_seekers.mcp.server import install_skill_tool
MCP_AVAILABLE = True
except ImportError:
MCP_AVAILABLE = False
@@ -99,15 +100,23 @@ Phases:
)
parser.add_argument(
"--config", required=True, help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')"
"--config",
required=True,
help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
)
parser.add_argument("--destination", default="output", help="Output directory for skill files (default: output/)")
parser.add_argument(
"--destination",
default="output",
help="Output directory for skill files (default: output/)",
)
parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude")
parser.add_argument(
"--unlimited", action="store_true", help="Remove page limits during scraping (WARNING: Can take hours)"
"--unlimited",
action="store_true",
help="Remove page limits during scraping (WARNING: Can take hours)",
)
parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing")

View File

@@ -17,10 +17,15 @@ logger = logging.getLogger(__name__)
try:
from skill_seekers.cli.swift_patterns import SWIFT_PATTERNS
except ImportError as e:
logger.warning("Swift language detection patterns unavailable. Swift code detection will be disabled. Error: %s", e)
logger.warning(
"Swift language detection patterns unavailable. Swift code detection will be disabled. Error: %s",
e,
)
SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {}
except Exception as e:
logger.error("Failed to load Swift patterns due to unexpected error: %s. Swift detection disabled.", e)
logger.error(
"Failed to load Swift patterns due to unexpected error: %s. Swift detection disabled.", e
)
SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {}
# Verify Swift patterns were loaded correctly
@@ -35,7 +40,8 @@ elif "swift" not in SWIFT_PATTERNS:
)
else:
logger.info(
"Swift patterns loaded successfully: %d patterns for language detection", len(SWIFT_PATTERNS.get("swift", []))
"Swift patterns loaded successfully: %d patterns for language detection",
len(SWIFT_PATTERNS.get("swift", [])),
)
# Comprehensive language patterns with weighted confidence scoring
@@ -473,7 +479,8 @@ class LanguageDetector:
self._pattern_cache[lang] = compiled_patterns
else:
logger.warning(
"No valid patterns compiled for language '%s'. Detection for this language is disabled.", lang
"No valid patterns compiled for language '%s'. Detection for this language is disabled.",
lang,
)
def detect_from_html(self, elem, code: str) -> tuple[str, float]:

View File

@@ -98,7 +98,9 @@ class LlmsTxtDownloader:
print(f" Retrying in {delay}s...")
time.sleep(delay)
else:
print(f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}")
print(
f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}"
)
return None
return None

View File

@@ -135,7 +135,11 @@ class LlmsTxtParser:
headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE)
for level_markers, text in headings:
page["headings"].append(
{"level": f"h{len(level_markers)}", "text": text.strip(), "id": text.lower().replace(" ", "-")}
{
"level": f"h{len(level_markers)}",
"text": text.strip(),
"id": text.lower().replace(" ", "-"),
}
)
# Remove code blocks from content for plain text

View File

@@ -66,52 +66,79 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
parser.add_argument("--version", action="version", version="%(prog)s 2.7.0")
subparsers = parser.add_subparsers(
dest="command", title="commands", description="Available Skill Seekers commands", help="Command to run"
dest="command",
title="commands",
description="Available Skill Seekers commands",
help="Command to run",
)
# === config subcommand ===
config_parser = subparsers.add_parser(
"config", help="Configure GitHub tokens, API keys, and settings", description="Interactive configuration wizard"
"config",
help="Configure GitHub tokens, API keys, and settings",
description="Interactive configuration wizard",
)
config_parser.add_argument(
"--github", action="store_true", help="Go directly to GitHub token setup"
)
config_parser.add_argument(
"--api-keys", action="store_true", help="Go directly to API keys setup"
)
config_parser.add_argument(
"--show", action="store_true", help="Show current configuration and exit"
)
config_parser.add_argument("--github", action="store_true", help="Go directly to GitHub token setup")
config_parser.add_argument("--api-keys", action="store_true", help="Go directly to API keys setup")
config_parser.add_argument("--show", action="store_true", help="Show current configuration and exit")
config_parser.add_argument("--test", action="store_true", help="Test connections and exit")
# === scrape subcommand ===
scrape_parser = subparsers.add_parser(
"scrape", help="Scrape documentation website", description="Scrape documentation website and generate skill"
"scrape",
help="Scrape documentation website",
description="Scrape documentation website and generate skill",
)
scrape_parser.add_argument("--config", help="Config JSON file")
scrape_parser.add_argument("--name", help="Skill name")
scrape_parser.add_argument("--url", help="Documentation URL")
scrape_parser.add_argument("--description", help="Skill description")
scrape_parser.add_argument("--skip-scrape", action="store_true", help="Skip scraping, use cached data")
scrape_parser.add_argument(
"--skip-scrape", action="store_true", help="Skip scraping, use cached data"
)
scrape_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
scrape_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
scrape_parser.add_argument(
"--enhance-local", action="store_true", help="AI enhancement (local)"
)
scrape_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
scrape_parser.add_argument("--async", dest="async_mode", action="store_true", help="Use async scraping")
scrape_parser.add_argument(
"--async", dest="async_mode", action="store_true", help="Use async scraping"
)
scrape_parser.add_argument("--workers", type=int, help="Number of async workers")
# === github subcommand ===
github_parser = subparsers.add_parser(
"github", help="Scrape GitHub repository", description="Scrape GitHub repository and generate skill"
"github",
help="Scrape GitHub repository",
description="Scrape GitHub repository and generate skill",
)
github_parser.add_argument("--config", help="Config JSON file")
github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
github_parser.add_argument("--name", help="Skill name")
github_parser.add_argument("--description", help="Skill description")
github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
github_parser.add_argument(
"--enhance-local", action="store_true", help="AI enhancement (local)"
)
github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
github_parser.add_argument(
"--non-interactive", action="store_true", help="Non-interactive mode (fail fast on rate limits)"
"--non-interactive",
action="store_true",
help="Non-interactive mode (fail fast on rate limits)",
)
github_parser.add_argument("--profile", type=str, help="GitHub profile name from config")
# === pdf subcommand ===
pdf_parser = subparsers.add_parser(
"pdf", help="Extract from PDF file", description="Extract content from PDF and generate skill"
"pdf",
help="Extract from PDF file",
description="Extract content from PDF and generate skill",
)
pdf_parser.add_argument("--config", help="Config JSON file")
pdf_parser.add_argument("--pdf", help="PDF file path")
@@ -138,7 +165,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
enhance_parser.add_argument("skill_directory", help="Skill directory path")
enhance_parser.add_argument("--background", action="store_true", help="Run in background")
enhance_parser.add_argument("--daemon", action="store_true", help="Run as daemon")
enhance_parser.add_argument("--no-force", action="store_true", help="Disable force mode (enable confirmations)")
enhance_parser.add_argument(
"--no-force", action="store_true", help="Disable force mode (enable confirmations)"
)
enhance_parser.add_argument("--timeout", type=int, default=600, help="Timeout in seconds")
# === enhance-status subcommand ===
@@ -148,13 +177,19 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
description="Monitor background enhancement processes",
)
enhance_status_parser.add_argument("skill_directory", help="Skill directory path")
enhance_status_parser.add_argument("--watch", "-w", action="store_true", help="Watch in real-time")
enhance_status_parser.add_argument(
"--watch", "-w", action="store_true", help="Watch in real-time"
)
enhance_status_parser.add_argument("--json", action="store_true", help="JSON output")
enhance_status_parser.add_argument("--interval", type=int, default=2, help="Watch interval in seconds")
enhance_status_parser.add_argument(
"--interval", type=int, default=2, help="Watch interval in seconds"
)
# === package subcommand ===
package_parser = subparsers.add_parser(
"package", help="Package skill into .zip file", description="Package skill directory into uploadable .zip"
"package",
help="Package skill into .zip file",
description="Package skill directory into uploadable .zip",
)
package_parser.add_argument("skill_directory", help="Skill directory path")
package_parser.add_argument("--no-open", action="store_true", help="Don't open output folder")
@@ -162,7 +197,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
# === upload subcommand ===
upload_parser = subparsers.add_parser(
"upload", help="Upload skill to Claude", description="Upload .zip file to Claude via Anthropic API"
"upload",
help="Upload skill to Claude",
description="Upload .zip file to Claude via Anthropic API",
)
upload_parser.add_argument("zip_file", help=".zip file to upload")
upload_parser.add_argument("--api-key", help="Anthropic API key")
@@ -183,17 +220,26 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
help="Extract usage examples from test files",
description="Analyze test files to extract real API usage patterns",
)
test_examples_parser.add_argument("directory", nargs="?", help="Directory containing test files")
test_examples_parser.add_argument("--file", help="Single test file to analyze")
test_examples_parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
test_examples_parser.add_argument(
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)"
"directory", nargs="?", help="Directory containing test files"
)
test_examples_parser.add_argument("--file", help="Single test file to analyze")
test_examples_parser.add_argument(
"--language", help="Filter by programming language (python, javascript, etc.)"
)
test_examples_parser.add_argument(
"--min-confidence",
type=float,
default=0.5,
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
)
test_examples_parser.add_argument(
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
)
test_examples_parser.add_argument("--json", action="store_true", help="Output JSON format")
test_examples_parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
test_examples_parser.add_argument(
"--markdown", action="store_true", help="Output Markdown format"
)
# === install-agent subcommand ===
install_agent_parser = subparsers.add_parser(
@@ -201,9 +247,13 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
help="Install skill to AI agent directories",
description="Copy skill to agent-specific installation directories",
)
install_agent_parser.add_argument("skill_directory", help="Skill directory path (e.g., output/react/)")
install_agent_parser.add_argument(
"--agent", required=True, help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)"
"skill_directory", help="Skill directory path (e.g., output/react/)"
)
install_agent_parser.add_argument(
"--agent",
required=True,
help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)",
)
install_agent_parser.add_argument(
"--force", action="store_true", help="Overwrite existing installation without asking"
@@ -219,18 +269,32 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
description="One-command skill installation (AI enhancement MANDATORY)",
)
install_parser.add_argument(
"--config", required=True, help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')"
"--config",
required=True,
help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
)
install_parser.add_argument(
"--destination", default="output", help="Output directory (default: output/)"
)
install_parser.add_argument(
"--no-upload", action="store_true", help="Skip automatic upload to Claude"
)
install_parser.add_argument(
"--unlimited", action="store_true", help="Remove page limits during scraping"
)
install_parser.add_argument(
"--dry-run", action="store_true", help="Preview workflow without executing"
)
install_parser.add_argument("--destination", default="output", help="Output directory (default: output/)")
install_parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude")
install_parser.add_argument("--unlimited", action="store_true", help="Remove page limits during scraping")
install_parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing")
# === resume subcommand ===
resume_parser = subparsers.add_parser(
"resume", help="Resume interrupted scraping job", description="Continue from saved progress checkpoint"
"resume",
help="Resume interrupted scraping job",
description="Continue from saved progress checkpoint",
)
resume_parser.add_argument(
"job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)"
)
resume_parser.add_argument("job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)")
resume_parser.add_argument("--list", action="store_true", help="List all resumable jobs")
resume_parser.add_argument("--clean", action="store_true", help="Clean up old progress files")

View File

@@ -38,7 +38,9 @@ logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def categorize_issues_by_topic(problems: list[dict], solutions: list[dict], topics: list[str]) -> dict[str, list[dict]]:
def categorize_issues_by_topic(
problems: list[dict], solutions: list[dict], topics: list[str]
) -> dict[str, list[dict]]:
"""
Categorize GitHub issues by topic keywords.
@@ -85,7 +87,10 @@ def categorize_issues_by_topic(problems: list[dict], solutions: list[dict], topi
def generate_hybrid_content(
api_data: dict, github_docs: dict | None, github_insights: dict | None, conflicts: list[Conflict]
api_data: dict,
github_docs: dict | None,
github_insights: dict | None,
conflicts: list[Conflict],
) -> dict[str, Any]:
"""
Generate hybrid content combining API data with GitHub context.
@@ -133,7 +138,11 @@ def generate_hybrid_content(
hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", [])
# Add conflict summary
hybrid["conflict_summary"] = {"total_conflicts": len(conflicts), "by_type": {}, "by_severity": {}}
hybrid["conflict_summary"] = {
"total_conflicts": len(conflicts),
"by_type": {},
"by_severity": {},
}
for conflict in conflicts:
# Count by type
@@ -159,7 +168,9 @@ def generate_hybrid_content(
return hybrid
def _match_issues_to_apis(apis: dict[str, dict], problems: list[dict], solutions: list[dict]) -> dict[str, list[dict]]:
def _match_issues_to_apis(
apis: dict[str, dict], problems: list[dict], solutions: list[dict]
) -> dict[str, list[dict]]:
"""
Match GitHub issues to specific APIs by keyword matching.
@@ -651,7 +662,12 @@ read -p "Press Enter when merge is complete..."
# Open new terminal with Claude Code
# Try different terminal emulators
terminals = [["x-terminal-emulator", "-e"], ["gnome-terminal", "--"], ["xterm", "-e"], ["konsole", "-e"]]
terminals = [
["x-terminal-emulator", "-e"],
["gnome-terminal", "--"],
["xterm", "-e"],
["konsole", "-e"],
]
for terminal_cmd in terminals:
try:
@@ -735,7 +751,9 @@ def merge_sources(
if github_streams:
logger.info("GitHub streams available for multi-layer merge")
if github_streams.docs_stream:
logger.info(f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files")
logger.info(
f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files"
)
if github_streams.insights_stream:
problems = len(github_streams.insights_stream.common_problems)
solutions = len(github_streams.insights_stream.known_solutions)
@@ -766,7 +784,11 @@ if __name__ == "__main__":
parser.add_argument("github_data", help="Path to GitHub data JSON")
parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path")
parser.add_argument(
"--mode", "-m", choices=["rule-based", "claude-enhanced"], default="rule-based", help="Merge mode"
"--mode",
"-m",
choices=["rule-based", "claude-enhanced"],
default="rule-based",
help="Merge mode",
)
args = parser.parse_args()

View File

@@ -17,12 +17,22 @@ from pathlib import Path
# Import utilities
try:
from quality_checker import SkillQualityChecker, print_report
from utils import format_file_size, open_folder, print_upload_instructions, validate_skill_directory
from utils import (
format_file_size,
open_folder,
print_upload_instructions,
validate_skill_directory,
)
except ImportError:
# If running from different directory, add cli to path
sys.path.insert(0, str(Path(__file__).parent))
from quality_checker import SkillQualityChecker, print_report
from utils import format_file_size, open_folder, print_upload_instructions, validate_skill_directory
from utils import (
format_file_size,
open_folder,
print_upload_instructions,
validate_skill_directory,
)
def package_skill(skill_dir, open_folder_after=True, skip_quality_check=False, target="claude"):
@@ -135,9 +145,13 @@ Examples:
parser.add_argument("skill_dir", help="Path to skill directory (e.g., output/react/)")
parser.add_argument("--no-open", action="store_true", help="Do not open the output folder after packaging")
parser.add_argument(
"--no-open", action="store_true", help="Do not open the output folder after packaging"
)
parser.add_argument("--skip-quality-check", action="store_true", help="Skip quality checks before packaging")
parser.add_argument(
"--skip-quality-check", action="store_true", help="Skip quality checks before packaging"
)
parser.add_argument(
"--target",
@@ -147,7 +161,9 @@ Examples:
)
parser.add_argument(
"--upload", action="store_true", help="Automatically upload after packaging (requires platform API key)"
"--upload",
action="store_true",
help="Automatically upload after packaging (requires platform API key)",
)
args = parser.parse_args()

View File

@@ -135,7 +135,9 @@ class BasePatternDetector:
# Default: no deep detection
return None
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None:
def detect_full(
self, class_sig, all_classes: list, file_content: str
) -> PatternInstance | None:
"""
Full detection using behavioral analysis.
@@ -150,7 +152,9 @@ class BasePatternDetector:
# Default: no full detection
return None
def detect(self, class_sig, all_classes: list, file_content: str | None = None) -> PatternInstance | None:
def detect(
self, class_sig, all_classes: list, file_content: str | None = None
) -> PatternInstance | None:
"""
Detect pattern based on configured depth.
@@ -273,7 +277,9 @@ class PatternRecognizer:
for class_sig in class_sigs:
for detector in self.detectors:
pattern = detector.detect(
class_sig=class_sig, all_classes=class_sigs, file_content=content if self.depth == "full" else None
class_sig=class_sig,
all_classes=class_sigs,
file_content=content if self.depth == "full" else None,
)
if pattern:
@@ -327,7 +333,9 @@ class PatternRecognizer:
params = []
for param in method.get("parameters", []):
param_obj = SimpleNamespace(
name=param.get("name", ""), type_hint=param.get("type_hint"), default=param.get("default")
name=param.get("name", ""),
type_hint=param.get("type_hint"),
default=param.get("default"),
)
params.append(param_obj)
@@ -397,7 +405,14 @@ class SingletonDetector(BasePatternDetector):
confidence = 0.0
# Check for instance method (getInstance, instance, get_instance, etc.)
instance_methods = ["getInstance", "instance", "get_instance", "Instance", "GetInstance", "INSTANCE"]
instance_methods = [
"getInstance",
"instance",
"get_instance",
"Instance",
"GetInstance",
"INSTANCE",
]
has_instance_method = False
for method in class_sig.methods:
@@ -438,7 +453,9 @@ class SingletonDetector(BasePatternDetector):
# Fallback to surface detection
return self.detect_surface(class_sig, all_classes)
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None:
def detect_full(
self, class_sig, all_classes: list, file_content: str
) -> PatternInstance | None:
"""
Full behavioral analysis for Singleton.
@@ -767,7 +784,9 @@ class StrategyDetector(BasePatternDetector):
siblings = [
cls.name
for cls in all_classes
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name
if cls.base_classes
and base_class in cls.base_classes
and cls.name != class_sig.name
]
if siblings:
@@ -885,7 +904,9 @@ class DecoratorDetector(BasePatternDetector):
siblings = [
cls.name
for cls in all_classes
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name
if cls.base_classes
and base_class in cls.base_classes
and cls.name != class_sig.name
]
if siblings:
@@ -898,7 +919,10 @@ class DecoratorDetector(BasePatternDetector):
# Check if takes object parameter (not just self)
if len(init_method.parameters) > 1: # More than just 'self'
param_names = [p.name for p in init_method.parameters if p.name != "self"]
if any(name in ["wrapped", "component", "inner", "obj", "target"] for name in param_names):
if any(
name in ["wrapped", "component", "inner", "obj", "target"]
for name in param_names
):
evidence.append(f"Takes wrapped object in constructor: {param_names}")
confidence += 0.4
@@ -969,7 +993,8 @@ class BuilderDetector(BasePatternDetector):
# Check for build/create terminal method
terminal_methods = ["build", "create", "execute", "construct", "make"]
has_terminal = any(
m.name.lower() in terminal_methods or m.name.lower().startswith("build") for m in class_sig.methods
m.name.lower() in terminal_methods or m.name.lower().startswith("build")
for m in class_sig.methods
)
if has_terminal:
@@ -979,7 +1004,9 @@ class BuilderDetector(BasePatternDetector):
# Check for setter methods (with_, set_, add_)
setter_prefixes = ["with", "set", "add", "configure"]
setter_count = sum(
1 for m in class_sig.methods if any(m.name.lower().startswith(prefix) for prefix in setter_prefixes)
1
for m in class_sig.methods
if any(m.name.lower().startswith(prefix) for prefix in setter_prefixes)
)
if setter_count >= 3:
@@ -1006,7 +1033,9 @@ class BuilderDetector(BasePatternDetector):
# Fallback to surface
return self.detect_surface(class_sig, all_classes)
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None:
def detect_full(
self, class_sig, all_classes: list, file_content: str
) -> PatternInstance | None:
"""Full behavioral analysis for Builder"""
# Start with deep detection
pattern = self.detect_deep(class_sig, all_classes)
@@ -1186,7 +1215,9 @@ class CommandDetector(BasePatternDetector):
has_execute = any(m.name.lower() in execute_methods for m in class_sig.methods)
if has_execute:
method_name = next(m.name for m in class_sig.methods if m.name.lower() in execute_methods)
method_name = next(
m.name for m in class_sig.methods if m.name.lower() in execute_methods
)
evidence.append(f"Has execute method: {method_name}()")
confidence += 0.5
@@ -1299,7 +1330,9 @@ class TemplateMethodDetector(BasePatternDetector):
]
hook_methods = [
m.name for m in class_sig.methods if any(keyword in m.name.lower() for keyword in hook_keywords)
m.name
for m in class_sig.methods
if any(keyword in m.name.lower() for keyword in hook_keywords)
]
if len(hook_methods) >= 2:
@@ -1307,7 +1340,11 @@ class TemplateMethodDetector(BasePatternDetector):
confidence += 0.3
# Check for abstract methods (no implementation or pass/raise)
abstract_methods = [m.name for m in class_sig.methods if m.name.startswith("_") or "abstract" in m.name.lower()]
abstract_methods = [
m.name
for m in class_sig.methods
if m.name.startswith("_") or "abstract" in m.name.lower()
]
if abstract_methods:
evidence.append(f"Has abstract methods: {', '.join(abstract_methods[:2])}")
@@ -1383,7 +1420,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
# Check for handle/process method
handle_methods = ["handle", "process", "execute", "filter", "middleware"]
has_handle = any(
m.name.lower() in handle_methods or m.name.lower().startswith("handle") for m in class_sig.methods
m.name.lower() in handle_methods or m.name.lower().startswith("handle")
for m in class_sig.methods
)
if has_handle:
@@ -1405,7 +1443,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
# Check for set_next() method
has_set_next = any(
"next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower()) for m in class_sig.methods
"next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower())
for m in class_sig.methods
)
if has_set_next:
@@ -1419,7 +1458,9 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
siblings = [
cls.name
for cls in all_classes
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name
if cls.base_classes
and base_class in cls.base_classes
and cls.name != class_sig.name
]
if siblings and has_next_ref:
@@ -1625,16 +1666,22 @@ Supported Languages:
""",
)
parser.add_argument("--file", action="append", help="Source file to analyze (can be specified multiple times)")
parser.add_argument(
"--file", action="append", help="Source file to analyze (can be specified multiple times)"
)
parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)")
parser.add_argument("--output", help="Output directory for results (default: current directory)")
parser.add_argument(
"--output", help="Output directory for results (default: current directory)"
)
parser.add_argument(
"--depth",
choices=["surface", "deep", "full"],
default="deep",
help="Detection depth: surface (fast), deep (default), full (thorough)",
)
parser.add_argument("--json", action="store_true", help="Output JSON format instead of human-readable")
parser.add_argument(
"--json", action="store_true", help="Output JSON format instead of human-readable"
)
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
args = parser.parse_args()
@@ -1697,7 +1744,9 @@ Supported Languages:
if not args.json and args.verbose:
print(f"\n{file_path}:")
for pattern in report.patterns:
print(f" [{pattern.pattern_type}] {pattern.class_name} (confidence: {pattern.confidence:.2f})")
print(
f" [{pattern.pattern_type}] {pattern.class_name} (confidence: {pattern.confidence:.2f})"
)
except Exception as e:
if args.verbose:
@@ -1737,11 +1786,15 @@ Supported Languages:
pattern_counts = {}
for report in all_reports:
for pattern in report.patterns:
pattern_counts[pattern.pattern_type] = pattern_counts.get(pattern.pattern_type, 0) + 1
pattern_counts[pattern.pattern_type] = (
pattern_counts.get(pattern.pattern_type, 0) + 1
)
if pattern_counts:
print("Pattern Summary:")
for pattern_type, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True):
for pattern_type, count in sorted(
pattern_counts.items(), key=lambda x: x[1], reverse=True
):
print(f" {pattern_type}: {count}")
print()

View File

@@ -196,7 +196,9 @@ class PDFExtractor:
"col_count": len(tab.extract()[0]) if tab.extract() else 0,
}
tables.append(table_data)
self.log(f" Found table {idx}: {table_data['row_count']}x{table_data['col_count']}")
self.log(
f" Found table {idx}: {table_data['row_count']}x{table_data['col_count']}"
)
except Exception as e:
self.log(f" Table extraction failed: {e}")
@@ -294,7 +296,9 @@ class PDFExtractor:
issues.append("May be natural language, not code")
# Check code/comment ratio
comment_lines = sum(1 for line in code.split("\n") if line.strip().startswith(("#", "//", "/*", "*", "--")))
comment_lines = sum(
1 for line in code.split("\n") if line.strip().startswith(("#", "//", "/*", "*", "--"))
)
total_lines = len([l for l in code.split("\n") if l.strip()])
if total_lines > 0 and comment_lines / total_lines > 0.7:
issues.append("Mostly comments")
@@ -501,11 +505,17 @@ class PDFExtractor:
# Common code patterns that span multiple lines
patterns = [
# Function definitions
(r"((?:def|function|func|fn|public|private)\s+\w+\s*\([^)]*\)\s*[{:]?[^}]*[}]?)", "function"),
(
r"((?:def|function|func|fn|public|private)\s+\w+\s*\([^)]*\)\s*[{:]?[^}]*[}]?)",
"function",
),
# Class definitions
(r"(class\s+\w+[^{]*\{[^}]*\})", "class"),
# Import statements block
(r"((?:import|require|use|include)[^\n]+(?:\n(?:import|require|use|include)[^\n]+)*)", "imports"),
(
r"((?:import|require|use|include)[^\n]+(?:\n(?:import|require|use|include)[^\n]+)*)",
"imports",
),
]
for pattern, block_type in patterns:
@@ -628,7 +638,15 @@ class PDFExtractor:
"""
if self.chunk_size == 0:
# No chunking - return all pages as one chunk
return [{"chunk_number": 1, "start_page": 1, "end_page": len(pages), "pages": pages, "chapter_title": None}]
return [
{
"chunk_number": 1,
"start_page": 1,
"end_page": len(pages),
"pages": pages,
"chapter_title": None,
}
]
chunks = []
current_chunk = []
@@ -812,7 +830,9 @@ class PDFExtractor:
code_samples = [c for c in code_samples if c["quality_score"] >= self.min_quality]
filtered_count = code_samples_before - len(code_samples)
if filtered_count > 0:
self.log(f" Filtered out {filtered_count} low-quality code blocks (min_quality={self.min_quality})")
self.log(
f" Filtered out {filtered_count} low-quality code blocks (min_quality={self.min_quality})"
)
# Sort by quality score (highest first)
code_samples.sort(key=lambda x: x["quality_score"], reverse=True)
@@ -891,7 +911,9 @@ class PDFExtractor:
# Show feature status
if self.use_ocr:
status = "✅ enabled" if TESSERACT_AVAILABLE else "⚠️ not available (install pytesseract)"
status = (
"✅ enabled" if TESSERACT_AVAILABLE else "⚠️ not available (install pytesseract)"
)
print(f" OCR: {status}")
if self.extract_tables:
print(" Table extraction: ✅ enabled")
@@ -905,7 +927,9 @@ class PDFExtractor:
# Extract each page (with parallel processing - Priority 3)
if self.parallel and CONCURRENT_AVAILABLE and len(self.doc) > 5:
print(f"🚀 Extracting {len(self.doc)} pages in parallel ({self.max_workers} workers)...")
print(
f"🚀 Extracting {len(self.doc)} pages in parallel ({self.max_workers} workers)..."
)
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
page_numbers = list(range(len(self.doc)))
self.pages = list(executor.map(self.extract_page, page_numbers))
@@ -962,7 +986,11 @@ class PDFExtractor:
for chunk in chunks:
if chunk["chapter_title"]:
chapters.append(
{"title": chunk["chapter_title"], "start_page": chunk["start_page"], "end_page": chunk["end_page"]}
{
"title": chunk["chapter_title"],
"start_page": chunk["start_page"],
"end_page": chunk["end_page"],
}
)
result = {
@@ -1042,12 +1070,21 @@ Examples:
parser.add_argument("-o", "--output", help="Output JSON file path (default: print to stdout)")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
parser.add_argument("--chunk-size", type=int, default=10, help="Pages per chunk (0 = no chunking, default: 10)")
parser.add_argument("--no-merge", action="store_true", help="Disable merging code blocks across pages")
parser.add_argument(
"--min-quality", type=float, default=0.0, help="Minimum code quality score (0-10, default: 0 = no filtering)"
"--chunk-size", type=int, default=10, help="Pages per chunk (0 = no chunking, default: 10)"
)
parser.add_argument(
"--no-merge", action="store_true", help="Disable merging code blocks across pages"
)
parser.add_argument(
"--min-quality",
type=float,
default=0.0,
help="Minimum code quality score (0-10, default: 0 = no filtering)",
)
parser.add_argument(
"--extract-images", action="store_true", help="Extract images to files (NEW in B1.5)"
)
parser.add_argument("--extract-images", action="store_true", help="Extract images to files (NEW in B1.5)")
parser.add_argument(
"--image-dir",
type=str,
@@ -1062,12 +1099,22 @@ Examples:
)
# Advanced features (Priority 2 & 3)
parser.add_argument("--ocr", action="store_true", help="Use OCR for scanned PDFs (requires pytesseract)")
parser.add_argument(
"--ocr", action="store_true", help="Use OCR for scanned PDFs (requires pytesseract)"
)
parser.add_argument("--password", type=str, default=None, help="Password for encrypted PDF")
parser.add_argument("--extract-tables", action="store_true", help="Extract tables from PDF (Priority 2)")
parser.add_argument("--parallel", action="store_true", help="Process pages in parallel (Priority 3)")
parser.add_argument("--workers", type=int, default=None, help="Number of parallel workers (default: CPU count)")
parser.add_argument("--no-cache", action="store_true", help="Disable caching of expensive operations")
parser.add_argument(
"--extract-tables", action="store_true", help="Extract tables from PDF (Priority 2)"
)
parser.add_argument(
"--parallel", action="store_true", help="Process pages in parallel (Priority 3)"
)
parser.add_argument(
"--workers", type=int, default=None, help="Number of parallel workers (default: CPU count)"
)
parser.add_argument(
"--no-cache", action="store_true", help="Disable caching of expensive operations"
)
args = parser.parse_args()

View File

@@ -54,7 +54,11 @@ def infer_description_from_pdf(pdf_metadata: dict = None, name: str = "") -> str
return f"Use when working with {title.lower()}"
# Improved fallback
return f"Use when referencing {name} documentation" if name else "Use when referencing this documentation"
return (
f"Use when referencing {name} documentation"
if name
else "Use when referencing this documentation"
)
class PDFToSkillConverter:
@@ -65,7 +69,9 @@ class PDFToSkillConverter:
self.name = config["name"]
self.pdf_path = config.get("pdf_path", "")
# Set initial description (will be improved after extraction if metadata available)
self.description = config.get("description", f"Use when referencing {self.name} documentation")
self.description = config.get(
"description", f"Use when referencing {self.name} documentation"
)
# Paths
self.skill_dir = f"output/{self.name}"
@@ -151,7 +157,10 @@ class PDFToSkillConverter:
if isinstance(first_value, list) and first_value and isinstance(first_value[0], dict):
# Already categorized - convert to expected format
for cat_key, pages in self.categories.items():
categorized[cat_key] = {"title": cat_key.replace("_", " ").title(), "pages": pages}
categorized[cat_key] = {
"title": cat_key.replace("_", " ").title(),
"pages": pages,
}
else:
# Keyword-based categorization
# Initialize categories
@@ -171,7 +180,8 @@ class PDFToSkillConverter:
score = sum(
1
for kw in keywords
if isinstance(kw, str) and (kw.lower() in text or kw.lower() in headings_text)
if isinstance(kw, str)
and (kw.lower() in text or kw.lower() in headings_text)
)
else:
score = 0
@@ -490,7 +500,13 @@ class PDFToSkillConverter:
for keyword in pattern_keywords:
if keyword in heading_text:
page_num = page.get("page_number", 0)
patterns.append({"type": keyword.title(), "heading": heading.get("text", ""), "page": page_num})
patterns.append(
{
"type": keyword.title(),
"heading": heading.get("text", ""),
"page": page_num,
}
)
break # Only add once per heading
if not patterns:
@@ -526,7 +542,8 @@ class PDFToSkillConverter:
def main():
parser = argparse.ArgumentParser(
description="Convert PDF documentation to Claude skill", formatter_class=argparse.RawDescriptionHelpFormatter
description="Convert PDF documentation to Claude skill",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--config", help="PDF config JSON file")
@@ -548,7 +565,10 @@ def main():
elif args.from_json:
# Build from extracted JSON
name = Path(args.from_json).stem.replace("_extracted", "")
config = {"name": name, "description": args.description or f"Use when referencing {name} documentation"}
config = {
"name": name,
"description": args.description or f"Use when referencing {name} documentation",
}
converter = PDFToSkillConverter(config)
converter.load_extracted_data(args.from_json)
converter.build_skill()
@@ -561,7 +581,12 @@ def main():
"name": args.name,
"pdf_path": args.pdf,
"description": args.description or f"Use when referencing {args.name} documentation",
"extract_options": {"chunk_size": 10, "min_quality": 5.0, "extract_images": True, "min_image_size": 100},
"extract_options": {
"chunk_size": 10,
"min_quality": 5.0,
"extract_images": True,
"min_image_size": 100,
},
}
# Create converter

View File

@@ -138,7 +138,9 @@ class SkillQualityChecker:
# Check references directory exists
if not self.references_dir.exists():
self.report.add_warning(
"structure", "references/ directory not found - skill may be incomplete", str(self.references_dir)
"structure",
"references/ directory not found - skill may be incomplete",
str(self.references_dir),
)
elif not list(self.references_dir.rglob("*.md")):
self.report.add_warning(
@@ -197,7 +199,9 @@ class SkillQualityChecker:
if sections < 4:
self.report.add_warning(
"enhancement", f"Only {sections} sections found - SKILL.md may be too basic", "SKILL.md"
"enhancement",
f"Only {sections} sections found - SKILL.md may be too basic",
"SKILL.md",
)
else:
self.report.add_info("enhancement", f"✓ Found {sections} sections", "SKILL.md")
@@ -211,7 +215,9 @@ class SkillQualityChecker:
# Check YAML frontmatter
if not content.startswith("---"):
self.report.add_error("content", "Missing YAML frontmatter - SKILL.md must start with ---", "SKILL.md", 1)
self.report.add_error(
"content", "Missing YAML frontmatter - SKILL.md must start with ---", "SKILL.md", 1
)
else:
# Extract frontmatter
try:
@@ -221,26 +227,38 @@ class SkillQualityChecker:
# Check for required fields
if "name:" not in frontmatter:
self.report.add_error("content", 'Missing "name:" field in YAML frontmatter', "SKILL.md", 2)
self.report.add_error(
"content", 'Missing "name:" field in YAML frontmatter', "SKILL.md", 2
)
# Check for description
if "description:" in frontmatter:
self.report.add_info("content", "✓ YAML frontmatter includes description", "SKILL.md")
self.report.add_info(
"content", "✓ YAML frontmatter includes description", "SKILL.md"
)
else:
self.report.add_error("content", "Invalid YAML frontmatter format", "SKILL.md", 1)
self.report.add_error(
"content", "Invalid YAML frontmatter format", "SKILL.md", 1
)
except Exception as e:
self.report.add_error("content", f"Error parsing YAML frontmatter: {e}", "SKILL.md", 1)
self.report.add_error(
"content", f"Error parsing YAML frontmatter: {e}", "SKILL.md", 1
)
# Check code block language tags
code_blocks_without_lang = re.findall(r"```\n[^`]", content)
if code_blocks_without_lang:
self.report.add_warning(
"content", f"Found {len(code_blocks_without_lang)} code blocks without language tags", "SKILL.md"
"content",
f"Found {len(code_blocks_without_lang)} code blocks without language tags",
"SKILL.md",
)
# Check for "When to Use" section
if "when to use" not in content.lower():
self.report.add_warning("content", 'Missing "When to Use This Skill" section', "SKILL.md")
self.report.add_warning(
"content", 'Missing "When to Use This Skill" section', "SKILL.md"
)
else:
self.report.add_info("content", '✓ Found "When to Use" section', "SKILL.md")
@@ -248,7 +266,9 @@ class SkillQualityChecker:
if self.references_dir.exists():
ref_files = list(self.references_dir.rglob("*.md"))
if ref_files:
self.report.add_info("content", f"✓ Found {len(ref_files)} reference files", "references/")
self.report.add_info(
"content", f"✓ Found {len(ref_files)} reference files", "references/"
)
# Check if references are mentioned in SKILL.md
mentioned_refs = 0
@@ -258,7 +278,9 @@ class SkillQualityChecker:
if mentioned_refs == 0:
self.report.add_warning(
"content", "Reference files exist but none are mentioned in SKILL.md", "SKILL.md"
"content",
"Reference files exist but none are mentioned in SKILL.md",
"SKILL.md",
)
def _check_links(self):
@@ -295,7 +317,9 @@ class SkillQualityChecker:
if links:
internal_links = [l for t, l in links if not l.startswith("http")]
if internal_links:
self.report.add_info("links", f"✓ All {len(internal_links)} internal links are valid", "SKILL.md")
self.report.add_info(
"links", f"✓ All {len(internal_links)} internal links are valid", "SKILL.md"
)
def _check_skill_completeness(self):
"""Check skill completeness based on best practices.
@@ -316,9 +340,13 @@ class SkillQualityChecker:
r"requirements?:",
r"make\s+sure\s+you\s+have",
]
has_grounding = any(re.search(pattern, content, re.IGNORECASE) for pattern in grounding_patterns)
has_grounding = any(
re.search(pattern, content, re.IGNORECASE) for pattern in grounding_patterns
)
if has_grounding:
self.report.add_info("completeness", "✓ Found verification/prerequisites section", "SKILL.md")
self.report.add_info(
"completeness", "✓ Found verification/prerequisites section", "SKILL.md"
)
else:
self.report.add_info(
"completeness",
@@ -334,12 +362,18 @@ class SkillQualityChecker:
r"error\s+handling",
r"when\s+things\s+go\s+wrong",
]
has_error_handling = any(re.search(pattern, content, re.IGNORECASE) for pattern in error_patterns)
has_error_handling = any(
re.search(pattern, content, re.IGNORECASE) for pattern in error_patterns
)
if has_error_handling:
self.report.add_info("completeness", "✓ Found error handling/troubleshooting guidance", "SKILL.md")
self.report.add_info(
"completeness", "✓ Found error handling/troubleshooting guidance", "SKILL.md"
)
else:
self.report.add_info(
"completeness", "Consider adding troubleshooting section for common issues", "SKILL.md"
"completeness",
"Consider adding troubleshooting section for common issues",
"SKILL.md",
)
# Check for workflow steps (numbered or sequential indicators)
@@ -351,10 +385,14 @@ class SkillQualityChecker:
r"finally,?\s+",
r"next,?\s+",
]
steps_found = sum(1 for pattern in step_patterns if re.search(pattern, content, re.IGNORECASE))
steps_found = sum(
1 for pattern in step_patterns if re.search(pattern, content, re.IGNORECASE)
)
if steps_found >= 3:
self.report.add_info(
"completeness", f"✓ Found clear workflow indicators ({steps_found} step markers)", "SKILL.md"
"completeness",
f"✓ Found clear workflow indicators ({steps_found} step markers)",
"SKILL.md",
)
elif steps_found > 0:
self.report.add_info(
@@ -451,7 +489,9 @@ Examples:
parser.add_argument("--verbose", "-v", action="store_true", help="Show all info messages")
parser.add_argument("--strict", action="store_true", help="Exit with error code if any warnings or errors found")
parser.add_argument(
"--strict", action="store_true", help="Exit with error code if any warnings or errors found"
)
args = parser.parse_args()

View File

@@ -179,7 +179,12 @@ class RateLimitHandler:
reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
return {"limit": limit, "remaining": remaining, "reset_timestamp": reset_timestamp, "reset_time": reset_time}
return {
"limit": limit,
"remaining": remaining,
"reset_timestamp": reset_timestamp,
"reset_time": reset_time,
}
def get_rate_limit_info(self) -> dict[str, Any]:
"""

View File

@@ -136,7 +136,9 @@ def print_summary(result):
# Category breakdown
if hasattr(result, "test_results"):
print(f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}")
print(
f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}"
)
categories = {}
for status, test in result.test_results:
@@ -164,11 +166,16 @@ def main():
import argparse
parser = argparse.ArgumentParser(
description="Run tests for Skill Seeker", formatter_class=argparse.RawDescriptionHelpFormatter
description="Run tests for Skill Seeker",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--suite", "-s", type=str, help="Run specific test suite (config, features, integration)")
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output (show each test)")
parser.add_argument(
"--suite", "-s", type=str, help="Run specific test suite (config, features, integration)"
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Verbose output (show each test)"
)
parser.add_argument("--quiet", "-q", action="store_true", help="Quiet output (minimal output)")
parser.add_argument("--failfast", "-f", action="store_true", help="Stop on first failure")
parser.add_argument("--list", "-l", action="store_true", help="List all available tests")
@@ -188,7 +195,9 @@ def main():
# Discover or load specific suite
if args.suite:
print(f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n")
print(
f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n"
)
suite = run_specific_suite(args.suite)
if suite is None:
return 1

View File

@@ -50,7 +50,9 @@ class ConfigSplitter:
print(" Single source unified config - no splitting needed")
return "none"
else:
print(f" Multi-source unified config ({num_sources} sources) - source split recommended")
print(
f" Multi-source unified config ({num_sources} sources) - source split recommended"
)
return "source"
# For unified configs, only 'source' and 'none' strategies are valid
elif self.strategy in ["source", "none"]:
@@ -77,7 +79,9 @@ class ConfigSplitter:
print(f" Medium documentation ({max_pages} pages) - category split recommended")
return "category"
elif "categories" in self.config and len(self.config["categories"]) >= 3:
print(f" Large documentation ({max_pages} pages) - router + categories recommended")
print(
f" Large documentation ({max_pages} pages) - router + categories recommended"
)
return "router"
else:
print(f" Large documentation ({max_pages} pages) - size-based split")
@@ -227,7 +231,9 @@ class ConfigSplitter:
"max_pages": 500, # Router only needs overview pages
"_router": True,
"_sub_skills": [cfg["name"] for cfg in sub_configs],
"_routing_keywords": {cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs},
"_routing_keywords": {
cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs
},
}
return router_config
@@ -333,11 +339,17 @@ Config Types:
help="Splitting strategy (default: auto)",
)
parser.add_argument("--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)")
parser.add_argument(
"--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)"
)
parser.add_argument("--output-dir", help="Output directory for configs (default: same as input)")
parser.add_argument(
"--output-dir", help="Output directory for configs (default: same as input)"
)
parser.add_argument("--dry-run", action="store_true", help="Show what would be created without saving files")
parser.add_argument(
"--dry-run", action="store_true", help="Show what would be created without saving files"
)
args = parser.parse_args()

View File

@@ -538,9 +538,13 @@ def _validate_patterns(patterns: dict[str, list[tuple[str, int]]]) -> None:
raise ValueError(f"Pattern {i} for '{lang}' is not a (regex, weight) tuple: {item}")
pattern, weight = item
if not isinstance(pattern, str):
raise ValueError(f"Pattern {i} for '{lang}': regex must be a string, got {type(pattern).__name__}")
raise ValueError(
f"Pattern {i} for '{lang}': regex must be a string, got {type(pattern).__name__}"
)
if not isinstance(weight, int) or weight < 1 or weight > 5:
raise ValueError(f"Pattern {i} for '{lang}': weight must be int 1-5, got {weight!r}")
raise ValueError(
f"Pattern {i} for '{lang}': weight must be int 1-5, got {weight!r}"
)
# Validate patterns at module load time

View File

@@ -251,7 +251,9 @@ class PythonTestAnalyzer:
# Process each test method
for node in class_node.body:
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
examples.extend(self._analyze_test_body(node, file_path, imports, setup_code=setup_code))
examples.extend(
self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
)
return examples
@@ -283,7 +285,11 @@ class PythonTestAnalyzer:
return None
def _analyze_test_body(
self, func_node: ast.FunctionDef, file_path: str, imports: list[str], setup_code: str | None = None
self,
func_node: ast.FunctionDef,
file_path: str,
imports: list[str],
setup_code: str | None = None,
) -> list[TestExample]:
"""Analyze test function body for extractable patterns"""
examples = []
@@ -297,7 +303,9 @@ class PythonTestAnalyzer:
# Extract different pattern categories
# 1. Instantiation patterns
instantiations = self._find_instantiations(func_node, file_path, docstring, setup_code, tags, imports)
instantiations = self._find_instantiations(
func_node, file_path, docstring, setup_code, tags, imports
)
examples.extend(instantiations)
# 2. Method calls with assertions
@@ -307,7 +315,9 @@ class PythonTestAnalyzer:
examples.extend(method_calls)
# 3. Configuration dictionaries
configs = self._find_config_dicts(func_node, file_path, docstring, setup_code, tags, imports)
configs = self._find_config_dicts(
func_node, file_path, docstring, setup_code, tags, imports
)
examples.extend(configs)
# 4. Multi-step workflows (integration tests)
@@ -707,7 +717,13 @@ class GenericTestAnalyzer:
return examples
def _create_example(
self, test_name: str, category: str, code: str, language: str, file_path: str, line_number: int
self,
test_name: str,
category: str,
code: str,
language: str,
file_path: str,
line_number: int,
) -> TestExample:
"""Create TestExample from regex match"""
return TestExample(
@@ -891,7 +907,9 @@ class TestExampleExtractor:
# Limit per file
if len(filtered_examples) > self.max_per_file:
# Sort by confidence and take top N
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[: self.max_per_file]
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
: self.max_per_file
]
logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
@@ -915,7 +933,10 @@ class TestExampleExtractor:
return self.LANGUAGE_MAP.get(suffix, "Unknown")
def _create_report(
self, examples: list[TestExample], file_path: str | None = None, directory: str | None = None
self,
examples: list[TestExample],
file_path: str | None = None,
directory: str | None = None,
) -> ExampleReport:
"""Create summary report from examples"""
# Enhance examples with AI analysis (C3.6)
@@ -932,15 +953,21 @@ class TestExampleExtractor:
# Count by category
examples_by_category = {}
for example in examples:
examples_by_category[example.category] = examples_by_category.get(example.category, 0) + 1
examples_by_category[example.category] = (
examples_by_category.get(example.category, 0) + 1
)
# Count by language
examples_by_language = {}
for example in examples:
examples_by_language[example.language] = examples_by_language.get(example.language, 0) + 1
examples_by_language[example.language] = (
examples_by_language.get(example.language, 0) + 1
)
# Calculate averages
avg_complexity = sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
avg_complexity = (
sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
)
high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
return ExampleReport(
@@ -983,15 +1010,25 @@ Examples:
parser.add_argument("directory", nargs="?", help="Directory containing test files")
parser.add_argument("--file", help="Single test file to analyze")
parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
parser.add_argument(
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)"
"--language", help="Filter by programming language (python, javascript, etc.)"
)
parser.add_argument(
"--min-confidence",
type=float,
default=0.5,
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
)
parser.add_argument(
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
)
parser.add_argument("--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)")
parser.add_argument("--json", action="store_true", help="Output JSON format")
parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
parser.add_argument(
"--recursive", action="store_true", default=True, help="Search directory recursively (default: True)"
"--recursive",
action="store_true",
default=True,
help="Search directory recursively (default: True)",
)
args = parser.parse_args()

View File

@@ -21,7 +21,12 @@ def test_validate_existing_unified_configs():
"""Test that all existing unified configs are valid"""
configs_dir = Path(__file__).parent.parent / "configs"
unified_configs = ["godot_unified.json", "react_unified.json", "django_unified.json", "fastapi_unified.json"]
unified_configs = [
"godot_unified.json",
"react_unified.json",
"django_unified.json",
"fastapi_unified.json",
]
for config_name in unified_configs:
config_path = configs_dir / config_name
@@ -56,8 +61,18 @@ def test_create_temp_unified_config():
"description": "Test unified config",
"merge_mode": "rule-based",
"sources": [
{"type": "documentation", "base_url": "https://example.com/docs", "extract_api": True, "max_pages": 50},
{"type": "github", "repo": "test/repo", "include_code": True, "code_analysis_depth": "surface"},
{
"type": "documentation",
"base_url": "https://example.com/docs",
"extract_api": True,
"max_pages": 50,
},
{
"type": "github",
"repo": "test/repo",
"include_code": True,
"code_analysis_depth": "surface",
},
],
}

View File

@@ -69,7 +69,11 @@ class UnifiedCodebaseAnalyzer:
self.github_token = github_token or os.getenv("GITHUB_TOKEN")
def analyze(
self, source: str, depth: str = "c3x", fetch_github_metadata: bool = True, output_dir: Path | None = None
self,
source: str,
depth: str = "c3x",
fetch_github_metadata: bool = True,
output_dir: Path | None = None,
) -> AnalysisResult:
"""
Analyze codebase with specified depth.
@@ -123,7 +127,9 @@ class UnifiedCodebaseAnalyzer:
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
# Build result with all streams
result = AnalysisResult(code_analysis=code_analysis, source_type="github", analysis_depth=depth)
result = AnalysisResult(
code_analysis=code_analysis, source_type="github", analysis_depth=depth
)
# Add GitHub-specific data if available
if fetch_metadata:
@@ -168,7 +174,9 @@ class UnifiedCodebaseAnalyzer:
else:
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
return AnalysisResult(code_analysis=code_analysis, source_type="local", analysis_depth=depth)
return AnalysisResult(
code_analysis=code_analysis, source_type="local", analysis_depth=depth
)
def basic_analysis(self, directory: Path) -> dict:
"""
@@ -423,7 +431,9 @@ class UnifiedCodebaseAnalyzer:
# Only include immediate subdirectories
structure["children"].append({"name": item.name, "type": "directory"})
elif item.is_file():
structure["children"].append({"name": item.name, "type": "file", "extension": item.suffix})
structure["children"].append(
{"name": item.name, "type": "file", "extension": item.suffix}
)
except Exception:
pass

View File

@@ -406,7 +406,13 @@ class UnifiedScraper:
# Append to list instead of overwriting (multi-source support)
self.scraped_data["github"].append(
{"repo": repo, "repo_id": repo_id, "idx": idx, "data": github_data, "data_file": github_data_file}
{
"repo": repo,
"repo_id": repo_id,
"idx": idx,
"data": github_data,
"data_file": github_data_file,
}
)
# Build standalone SKILL.md for synthesis using GitHubToSkillConverter
@@ -433,7 +439,9 @@ class UnifiedScraper:
logger.info(f"📦 Moved GitHub output to cache: {cache_github_dir}")
if os.path.exists(github_data_file_path):
cache_github_data = os.path.join(self.data_dir, f"{github_config['name']}_github_data.json")
cache_github_data = os.path.join(
self.data_dir, f"{github_config['name']}_github_data.json"
)
if os.path.exists(cache_github_data):
os.remove(cache_github_data)
shutil.move(github_data_file_path, cache_github_data)
@@ -478,7 +486,13 @@ class UnifiedScraper:
# Append to list instead of overwriting
self.scraped_data["pdf"].append(
{"pdf_path": pdf_path, "pdf_id": pdf_id, "idx": idx, "data": pdf_data, "data_file": pdf_data_file}
{
"pdf_path": pdf_path,
"pdf_id": pdf_id,
"idx": idx,
"data": pdf_data,
"data_file": pdf_data_file,
}
)
# Build standalone SKILL.md for synthesis
@@ -611,12 +625,20 @@ class UnifiedScraper:
# Load C3.x outputs into memory
c3_data = {
"patterns": self._load_json(temp_output / "patterns" / "detected_patterns.json"),
"test_examples": self._load_json(temp_output / "test_examples" / "test_examples.json"),
"test_examples": self._load_json(
temp_output / "test_examples" / "test_examples.json"
),
"how_to_guides": self._load_guide_collection(temp_output / "tutorials"),
"config_patterns": self._load_json(temp_output / "config_patterns" / "config_patterns.json"),
"architecture": self._load_json(temp_output / "architecture" / "architectural_patterns.json"),
"config_patterns": self._load_json(
temp_output / "config_patterns" / "config_patterns.json"
),
"architecture": self._load_json(
temp_output / "architecture" / "architectural_patterns.json"
),
"api_reference": self._load_api_reference(temp_output / "api_reference"), # C2.5
"dependency_graph": self._load_json(temp_output / "dependencies" / "dependency_graph.json"), # C2.6
"dependency_graph": self._load_json(
temp_output / "dependencies" / "dependency_graph.json"
), # C2.6
}
# Log summary
@@ -769,7 +791,9 @@ class UnifiedScraper:
conflicts = conflicts_data.get("conflicts", [])
# Build skill
builder = UnifiedSkillBuilder(self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir)
builder = UnifiedSkillBuilder(
self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir
)
builder.build()
@@ -836,7 +860,10 @@ Examples:
parser.add_argument("--config", "-c", required=True, help="Path to unified config JSON file")
parser.add_argument(
"--merge-mode", "-m", choices=["rule-based", "claude-enhanced"], help="Override config merge mode"
"--merge-mode",
"-m",
choices=["rule-based", "claude-enhanced"],
help="Override config merge mode",
)
parser.add_argument(
"--skip-codebase-analysis",
@@ -854,7 +881,9 @@ Examples:
for source in scraper.config.get("sources", []):
if source["type"] == "github":
source["enable_codebase_analysis"] = False
logger.info(f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}")
logger.info(
f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}"
)
# Run scraper
scraper.run()

View File

@@ -97,7 +97,9 @@ class UnifiedSkillBuilder:
if docs_skill_path.exists():
try:
skill_mds["documentation"] = docs_skill_path.read_text(encoding="utf-8")
logger.debug(f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)")
logger.debug(
f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)"
)
except OSError as e:
logger.warning(f"Failed to read documentation SKILL.md: {e}")
@@ -109,7 +111,9 @@ class UnifiedSkillBuilder:
try:
content = github_skill_path.read_text(encoding="utf-8")
github_sources.append(content)
logger.debug(f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)")
logger.debug(
f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)"
)
except OSError as e:
logger.warning(f"Failed to read GitHub SKILL.md from {github_dir.name}: {e}")
@@ -165,7 +169,23 @@ class UnifiedSkillBuilder:
current_section = line[3:].strip()
# Remove emoji and markdown formatting
current_section = current_section.split("](")[0] # Remove links
for emoji in ["📚", "🏗️", "⚠️", "🔧", "📖", "💡", "🎯", "📊", "🔍", "⚙️", "🧪", "📝", "🗂️", "📐", ""]:
for emoji in [
"📚",
"🏗️",
"⚠️",
"🔧",
"📖",
"💡",
"🎯",
"📊",
"🔍",
"⚙️",
"🧪",
"📝",
"🗂️",
"📐",
"",
]:
current_section = current_section.replace(emoji, "").strip()
current_content = []
elif current_section:
@@ -268,7 +288,9 @@ This skill synthesizes knowledge from multiple sources:
if "Quick Reference" in github_sections:
# Include GitHub's Quick Reference (contains design patterns summary)
logger.info(f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)")
logger.info(
f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)"
)
content += github_sections["Quick Reference"] + "\n\n"
else:
logger.warning("DEBUG: GitHub Quick Reference section NOT FOUND!")
@@ -330,7 +352,9 @@ This skill synthesizes knowledge from multiple sources:
# Footer
content += "---\n\n"
content += "*Synthesized from official documentation and codebase analysis by Skill Seekers*\n"
content += (
"*Synthesized from official documentation and codebase analysis by Skill Seekers*\n"
)
return content
@@ -602,7 +626,9 @@ This skill combines knowledge from multiple sources:
# Count by type
by_type = {}
for conflict in self.conflicts:
ctype = conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown")
ctype = (
conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown")
)
by_type[ctype] = by_type.get(ctype, 0) + 1
content += "**Conflict Breakdown:**\n"
@@ -836,7 +862,9 @@ This skill combines knowledge from multiple sources:
source_id = doc_source.get("source_id", "unknown")
base_url = doc_source.get("base_url", "Unknown")
total_pages = doc_source.get("total_pages", "N/A")
f.write(f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n")
f.write(
f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n"
)
logger.info(f"Created documentation references ({len(docs_list)} sources)")
@@ -1084,9 +1112,13 @@ This skill combines knowledge from multiple sources:
pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
if pattern_summary:
for ptype, count in sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True):
for ptype, count in sorted(
pattern_summary.items(), key=lambda x: x[1], reverse=True
):
f.write(f"- **{ptype}**: {count} instance(s)\n")
f.write("\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n")
f.write(
"\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n"
)
else:
f.write("*No design patterns detected.*\n\n")
@@ -1115,7 +1147,9 @@ This skill combines knowledge from multiple sources:
f.write("\n**Recommended Actions**:\n")
for action in insights["recommended_actions"][:5]:
f.write(f"- {action}\n")
f.write("\n📁 See `references/codebase_analysis/configuration/` for details.\n\n")
f.write(
"\n📁 See `references/codebase_analysis/configuration/` for details.\n\n"
)
else:
f.write("*No configuration files detected.*\n\n")
@@ -1128,7 +1162,9 @@ This skill combines knowledge from multiple sources:
f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n")
for guide in guides[:10]: # Top 10
f.write(f"- {guide.get('title', 'Untitled Guide')}\n")
f.write("\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n")
f.write(
"\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n"
)
else:
f.write("*No workflow guides extracted.*\n\n")
@@ -1147,11 +1183,15 @@ This skill combines knowledge from multiple sources:
if examples.get("examples_by_category"):
f.write("\n**By Category**:\n")
for cat, count in sorted(
examples["examples_by_category"].items(), key=lambda x: x[1], reverse=True
examples["examples_by_category"].items(),
key=lambda x: x[1],
reverse=True,
):
f.write(f"- {cat}: {count}\n")
f.write("\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n")
f.write(
"\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n"
)
else:
f.write("*No test examples extracted.*\n\n")
@@ -1163,13 +1203,17 @@ This skill combines knowledge from multiple sources:
dir_struct = c3_data["architecture"].get("directory_structure", {})
if dir_struct:
f.write("**Main Directories**:\n")
for dir_name, file_count in sorted(dir_struct.items(), key=lambda x: x[1], reverse=True)[:15]:
for dir_name, file_count in sorted(
dir_struct.items(), key=lambda x: x[1], reverse=True
)[:15]:
f.write(f"- `{dir_name}/`: {file_count} file(s)\n")
f.write("\n")
# Footer
f.write("---\n\n")
f.write("*This architecture overview was automatically generated by C3.x codebase analysis.*\n")
f.write(
"*This architecture overview was automatically generated by C3.x codebase analysis.*\n"
)
f.write("*Last updated: skill build time*\n")
logger.info("📐 Created ARCHITECTURE.md")
@@ -1277,7 +1321,9 @@ This skill combines knowledge from multiple sources:
if guides:
f.write("## Available Guides\n\n")
for guide in guides:
f.write(f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n")
f.write(
f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n"
)
f.write("\n")
# Save individual guide markdown files
@@ -1351,7 +1397,9 @@ This skill combines knowledge from multiple sources:
if insights:
f.write("## Overall Insights\n\n")
if insights.get("security_issues_found"):
f.write(f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n")
f.write(
f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n"
)
if insights.get("recommended_actions"):
f.write("**Recommended Actions**:\n")
for action in insights["recommended_actions"]:
@@ -1425,7 +1473,9 @@ This skill combines knowledge from multiple sources:
top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3]
if top_patterns:
content += f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
content += (
f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
)
content += "\n"
# Add test examples summary
@@ -1449,7 +1499,9 @@ This skill combines knowledge from multiple sources:
# Add security warning if present
if c3_data["config_patterns"].get("ai_enhancements"):
insights = c3_data["config_patterns"]["ai_enhancements"].get("overall_insights", {})
insights = c3_data["config_patterns"]["ai_enhancements"].get(
"overall_insights", {}
)
security_issues = insights.get("security_issues_found", 0)
if security_issues > 0:
content += f"- 🔐 **Security Alert**: {security_issues} issue(s) detected\n"
@@ -1477,7 +1529,8 @@ This skill combines knowledge from multiple sources:
medium = [
c
for c in self.conflicts
if (hasattr(c, "severity") and c.severity == "medium") or c.get("severity") == "medium"
if (hasattr(c, "severity") and c.severity == "medium")
or c.get("severity") == "medium"
]
low = [
c
@@ -1497,9 +1550,15 @@ This skill combines knowledge from multiple sources:
for conflict in high:
api_name = (
conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown")
conflict.api_name
if hasattr(conflict, "api_name")
else conflict.get("api_name", "Unknown")
)
diff = (
conflict.difference
if hasattr(conflict, "difference")
else conflict.get("difference", "N/A")
)
diff = conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A")
f.write(f"### {api_name}\n\n")
f.write(f"**Issue**: {diff}\n\n")
@@ -1510,9 +1569,15 @@ This skill combines knowledge from multiple sources:
for conflict in medium[:20]: # Limit to 20
api_name = (
conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown")
conflict.api_name
if hasattr(conflict, "api_name")
else conflict.get("api_name", "Unknown")
)
diff = (
conflict.difference
if hasattr(conflict, "difference")
else conflict.get("difference", "N/A")
)
diff = conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A")
f.write(f"### {api_name}\n\n")
f.write(f"{diff}\n\n")
@@ -1534,7 +1599,9 @@ if __name__ == "__main__":
config = json.load(f)
# Mock scraped data
scraped_data = {"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}}
scraped_data = {
"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}
}
builder = UnifiedSkillBuilder(config, scraped_data)
builder.build()

View File

@@ -179,7 +179,9 @@ def validate_zip_file(zip_path: str | Path) -> tuple[bool, str | None]:
return True, None
def read_reference_files(skill_dir: str | Path, max_chars: int = 100000, preview_limit: int = 40000) -> dict[str, dict]:
def read_reference_files(
skill_dir: str | Path, max_chars: int = 100000, preview_limit: int = 40000
) -> dict[str, dict]:
"""Read reference files from a skill directory with enriched metadata.
This function reads markdown files from the references/ subdirectory
@@ -319,7 +321,10 @@ def read_reference_files(skill_dir: str | Path, max_chars: int = 100000, preview
def retry_with_backoff(
operation: Callable[[], T], max_attempts: int = 3, base_delay: float = 1.0, operation_name: str = "operation"
operation: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
operation_name: str = "operation",
) -> T:
"""Retry an operation with exponential backoff.
@@ -355,7 +360,12 @@ def retry_with_backoff(
if attempt < max_attempts:
delay = base_delay * (2 ** (attempt - 1))
logger.warning(
"%s failed (attempt %d/%d), retrying in %.1fs: %s", operation_name, attempt, max_attempts, delay, e
"%s failed (attempt %d/%d), retrying in %.1fs: %s",
operation_name,
attempt,
max_attempts,
delay,
e,
)
time.sleep(delay)
else:
@@ -368,7 +378,10 @@ def retry_with_backoff(
async def retry_with_backoff_async(
operation: Callable[[], T], max_attempts: int = 3, base_delay: float = 1.0, operation_name: str = "operation"
operation: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
operation_name: str = "operation",
) -> T:
"""Async version of retry_with_backoff for async operations.
@@ -403,7 +416,12 @@ async def retry_with_backoff_async(
if attempt < max_attempts:
delay = base_delay * (2 ** (attempt - 1))
logger.warning(
"%s failed (attempt %d/%d), retrying in %.1fs: %s", operation_name, attempt, max_attempts, delay, e
"%s failed (attempt %d/%d), retrying in %.1fs: %s",
operation_name,
attempt,
max_attempts,
delay,
e,
)
await asyncio.sleep(delay)
else:

View File

@@ -138,7 +138,9 @@ class AgentDetector:
return None
return self.AGENT_CONFIG[agent_id]["transport"]
def generate_config(self, agent_id: str, server_command: str, http_port: int | None = 3000) -> str | None:
def generate_config(
self, agent_id: str, server_command: str, http_port: int | None = 3000
) -> str | None:
"""
Generate MCP configuration for a specific agent.
@@ -282,7 +284,9 @@ def detect_agents() -> list[dict[str, str]]:
return detector.detect_agents()
def generate_config(agent_name: str, server_command: str = "skill-seekers mcp", http_port: int = 3000) -> str | None:
def generate_config(
agent_name: str, server_command: str = "skill-seekers mcp", http_port: int = 3000
) -> str | None:
"""
Convenience function to generate config for a specific agent.

View File

@@ -118,7 +118,8 @@ class GitConfigRepo:
) from e
elif "not found" in error_msg.lower() or "404" in error_msg:
raise GitCommandError(
f"Repository not found: {git_url}. Verify the URL is correct and you have access.", 128
f"Repository not found: {git_url}. Verify the URL is correct and you have access.",
128,
) from e
else:
raise GitCommandError(f"Failed to clone repository: {error_msg}", 128) from e

View File

@@ -139,14 +139,20 @@ try:
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="scrape_docs", description="Scrape documentation", inputSchema={"type": "object", "properties": {}}
name="scrape_docs",
description="Scrape documentation",
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="scrape_github",
description="Scrape GitHub repository",
inputSchema={"type": "object", "properties": {}},
),
Tool(name="scrape_pdf", description="Scrape PDF file", inputSchema={"type": "object", "properties": {}}),
Tool(
name="scrape_pdf",
description="Scrape PDF file",
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="package_skill",
description="Package skill into .zip",
@@ -157,9 +163,15 @@ try:
description="Upload skill to Claude",
inputSchema={"type": "object", "properties": {}},
),
Tool(name="install_skill", description="Install skill", inputSchema={"type": "object", "properties": {}}),
Tool(
name="split_config", description="Split large config", inputSchema={"type": "object", "properties": {}}
name="install_skill",
description="Install skill",
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="split_config",
description="Split large config",
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="generate_router",

View File

@@ -726,7 +726,13 @@ async def estimate_pages_tool(args: dict) -> list[TextContent]:
timeout = max(300, max_discovery // 2) # Minimum 5 minutes
# Run estimate_pages.py
cmd = [sys.executable, str(CLI_DIR / "estimate_pages.py"), config_path, "--max-discovery", str(max_discovery)]
cmd = [
sys.executable,
str(CLI_DIR / "estimate_pages.py"),
config_path,
"--max-discovery",
str(max_discovery),
]
progress_msg = "🔄 Estimating page count...\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
@@ -980,7 +986,9 @@ async def validate_config_tool(args: dict) -> list[TextContent]:
try:
# Check if file exists
if not Path(config_path).exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
# Try unified config validator first
try:
@@ -1004,7 +1012,9 @@ async def validate_config_tool(args: dict) -> list[TextContent]:
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
elif source["type"] == "github":
result += f" Repo: {source.get('repo', 'N/A')}\n"
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
result += (
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
)
elif source["type"] == "pdf":
result += f" Path: {source.get('path', 'N/A')}\n"
@@ -1106,7 +1116,9 @@ async def generate_router_tool(args: dict) -> list[TextContent]:
config_files = glob.glob(config_pattern)
if not config_files:
return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")]
return [
TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
]
# Run generate_router.py
cmd = [
@@ -1159,7 +1171,11 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
cmd.extend(["--from-json", from_json])
else:
return [TextContent(type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json")]
return [
TextContent(
type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
)
]
# Run pdf_scraper.py with streaming (can take a while)
timeout = 600 # 10 minutes for PDF extraction
@@ -1257,7 +1273,12 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
# MODE 1: Named Source (highest priority)
if source_name:
if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using source parameter")]
return [
TextContent(
type="text",
text="❌ Error: config_name is required when using source parameter",
)
]
# Get source from registry
source_manager = SourceManager()
@@ -1278,7 +1299,11 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
git_repo = GitConfigRepo()
try:
repo_path = git_repo.clone_or_pull(
source_name=source_name, git_url=git_url, branch=branch, token=token, force_refresh=force_refresh
source_name=source_name,
git_url=git_url,
branch=branch,
token=token,
force_refresh=force_refresh,
)
except Exception as e:
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
@@ -1320,7 +1345,12 @@ Next steps:
# MODE 2: Direct Git URL
elif git_url:
if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using git_url parameter")]
return [
TextContent(
type="text",
text="❌ Error: config_name is required when using git_url parameter",
)
]
# Clone/pull repository
git_repo = GitConfigRepo()
@@ -1418,7 +1448,9 @@ Next steps:
if tags:
result += f" Tags: {tags}\n"
result += "\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
result += (
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
)
result += f"📚 API Docs: {API_BASE_URL}/docs\n"
return [TextContent(type="text", text=result)]
@@ -1426,7 +1458,10 @@ Next steps:
# Download specific config
if not config_name:
return [
TextContent(type="text", text="❌ Error: Please provide config_name or set list_available=true")
TextContent(
type="text",
text="❌ Error: Please provide config_name or set list_available=true",
)
]
# Get config details first
@@ -1486,11 +1521,14 @@ Next steps:
except httpx.HTTPError as e:
return [
TextContent(
type="text", text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later."
type="text",
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
)
]
except json.JSONDecodeError as e:
return [TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")]
return [
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
@@ -1575,7 +1613,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
if not dry_run:
# Call fetch_config_tool directly
fetch_result = await fetch_config_tool({"config_name": config_name, "destination": destination})
fetch_result = await fetch_config_tool(
{"config_name": config_name, "destination": destination}
)
# Parse result to extract config path
fetch_output = fetch_result[0].text
@@ -1589,7 +1629,12 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["config_path"] = match.group(1).strip()
output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
else:
return [TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Failed to fetch config")]
return [
TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
)
]
workflow_state["phases_completed"].append("fetch_config")
else:
@@ -1614,7 +1659,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["skill_name"] = config.get("name", "unknown")
except Exception as e:
return [
TextContent(type="text", text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}")
TextContent(
type="text",
text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
)
]
# Call scrape_docs_tool (does NOT include enhancement)
@@ -1638,7 +1686,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
# Check for success
if "" in scrape_output:
return [
TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above")
TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
)
]
workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
@@ -1738,7 +1789,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
if not dry_run:
if has_api_key:
# Call upload_skill_tool
upload_result = await upload_skill_tool({"skill_zip": workflow_state["zip_path"]})
upload_result = await upload_skill_tool(
{"skill_zip": workflow_state["zip_path"]}
)
upload_output = upload_result[0].text
output_lines.append(upload_output)
@@ -1813,7 +1866,10 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
from github import Github, GithubException
except ImportError:
return [
TextContent(type="text", text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub")
TextContent(
type="text",
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
)
]
config_path = args.get("config_path")
@@ -1826,7 +1882,9 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if config_path:
config_file = Path(config_path)
if not config_file.exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
with open(config_file) as f:
config_data = json.load(f)
@@ -1841,7 +1899,11 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
else:
return [TextContent(type="text", text="❌ Error: Must provide either config_path or config_json")]
return [
TextContent(
type="text", text="❌ Error: Must provide either config_path or config_json"
)
]
# Use ConfigValidator for comprehensive validation
if ConfigValidator is None:
@@ -1871,14 +1933,20 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if not is_unified:
# Legacy config - check base_url
base_url = config_data.get("base_url", "")
if base_url and not (base_url.startswith("http://") or base_url.startswith("https://")):
raise ValueError(f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://")
if base_url and not (
base_url.startswith("http://") or base_url.startswith("https://")
):
raise ValueError(
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
)
else:
# Unified config - check URLs in sources
for idx, source in enumerate(config_data.get("sources", [])):
if source.get("type") == "documentation":
source_url = source.get("base_url", "")
if source_url and not (source_url.startswith("http://") or source_url.startswith("https://")):
if source_url and not (
source_url.startswith("http://") or source_url.startswith("https://")
):
raise ValueError(
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
)
@@ -1920,7 +1988,10 @@ Please fix these issues and try again.
# For legacy configs, use name-based detection
name_lower = config_name.lower()
category = "other"
if any(x in name_lower for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]):
if any(
x in name_lower
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
):
category = "web-frameworks"
elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
category = "game-engines"
@@ -1936,12 +2007,16 @@ Please fix these issues and try again.
if "max_pages" not in config_data:
warnings.append("⚠️ No max_pages set - will use default (100)")
elif config_data.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours")
warnings.append(
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
)
else:
# Unified config warnings
for src in config_data.get("sources", []):
if src.get("type") == "documentation" and "max_pages" not in src:
warnings.append("⚠️ No max_pages set for documentation source - will use default (100)")
warnings.append(
"⚠️ No max_pages set for documentation source - will use default (100)"
)
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled for documentation source")
@@ -1996,7 +2071,9 @@ Please fix these issues and try again.
# Create issue
issue = repo.create_issue(
title=f"[CONFIG] {config_name}", body=issue_body, labels=["config-submission", "needs-review"]
title=f"[CONFIG] {config_name}",
body=issue_body,
labels=["config-submission", "needs-review"],
)
result = f"""✅ Config submitted successfully!

View File

@@ -64,7 +64,9 @@ class SourceManager:
"""
# Validate name
if not name or not name.replace("-", "").replace("_", "").isalnum():
raise ValueError(f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores.")
raise ValueError(
f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores."
)
# Validate git_url
if not git_url or not git_url.strip():
@@ -136,7 +138,9 @@ class SourceManager:
# Not found - provide helpful error
available = [s["name"] for s in registry["sources"]]
raise KeyError(f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}")
raise KeyError(
f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}"
)
def list_sources(self, enabled_only: bool = False) -> list[dict]:
"""

View File

@@ -169,7 +169,9 @@ async def validate_config(args: dict) -> list[TextContent]:
try:
# Check if file exists
if not Path(config_path).exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
# Try unified config validator first
try:
@@ -193,7 +195,9 @@ async def validate_config(args: dict) -> list[TextContent]:
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
elif source["type"] == "github":
result += f" Repo: {source.get('repo', 'N/A')}\n"
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
result += (
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
)
elif source["type"] == "pdf":
result += f" Path: {source.get('path', 'N/A')}\n"

View File

@@ -252,14 +252,18 @@ async def upload_skill_tool(args: dict) -> list[TextContent]:
except ValueError as e:
return [
TextContent(
type="text", text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai"
type="text",
text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai",
)
]
# Check if upload is supported
if target == "markdown":
return [
TextContent(type="text", text="❌ Markdown export does not support upload. Use the packaged file manually.")
TextContent(
type="text",
text="❌ Markdown export does not support upload. Use the packaged file manually.",
)
]
# Run upload_skill.py with target parameter
@@ -323,13 +327,18 @@ async def enhance_skill_tool(args: dict) -> list[TextContent]:
except ValueError as e:
return [
TextContent(
type="text", text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai"
type="text",
text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai",
)
]
# Check if enhancement is supported
if not adaptor.supports_enhancement():
return [TextContent(type="text", text=f"{adaptor.PLATFORM_NAME} does not support AI enhancement")]
return [
TextContent(
type="text", text=f"{adaptor.PLATFORM_NAME} does not support AI enhancement"
)
]
output_lines = []
output_lines.append(f"🚀 Enhancing skill with {adaptor.PLATFORM_NAME}")
@@ -373,12 +382,19 @@ async def enhance_skill_tool(args: dict) -> list[TextContent]:
if not api_key:
return [
TextContent(type="text", text=f"{env_var} not set. Set API key or pass via api_key parameter.")
TextContent(
type="text",
text=f"{env_var} not set. Set API key or pass via api_key parameter.",
)
]
# Validate API key
if not adaptor.validate_api_key(api_key):
return [TextContent(type="text", text=f"❌ Invalid API key format for {adaptor.PLATFORM_NAME}")]
return [
TextContent(
type="text", text=f"❌ Invalid API key format for {adaptor.PLATFORM_NAME}"
)
]
output_lines.append("Calling API for enhancement...")
output_lines.append("")
@@ -447,7 +463,8 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
except ValueError as e:
return [
TextContent(
type="text", text=f"❌ Error: {str(e)}\n\nSupported platforms: claude, gemini, openai, markdown"
type="text",
text=f"❌ Error: {str(e)}\n\nSupported platforms: claude, gemini, openai, markdown",
)
]
@@ -498,7 +515,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
if not dry_run:
# Call fetch_config_tool directly
fetch_result = await fetch_config_tool({"config_name": config_name, "destination": destination})
fetch_result = await fetch_config_tool(
{"config_name": config_name, "destination": destination}
)
# Parse result to extract config path
fetch_output = fetch_result[0].text
@@ -512,7 +531,12 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["config_path"] = match.group(1).strip()
output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
else:
return [TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Failed to fetch config")]
return [
TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
)
]
workflow_state["phases_completed"].append("fetch_config")
else:
@@ -537,7 +561,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["skill_name"] = config.get("name", "unknown")
except Exception as e:
return [
TextContent(type="text", text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}")
TextContent(
type="text",
text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
)
]
# Call scrape_docs_tool (does NOT include enhancement)
@@ -561,7 +588,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
# Check for success
if "" in scrape_output:
return [
TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above")
TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
)
]
workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
@@ -641,9 +671,13 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
else:
# Fallback: construct package path based on platform
if target == "gemini":
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}-gemini.tar.gz"
workflow_state["zip_path"] = (
f"{destination}/{workflow_state['skill_name']}-gemini.tar.gz"
)
elif target == "openai":
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}-openai.zip"
workflow_state["zip_path"] = (
f"{destination}/{workflow_state['skill_name']}-openai.zip"
)
else:
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}.zip"
@@ -660,7 +694,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
pkg_ext = "zip"
pkg_file = f"{destination}/{workflow_state['skill_name']}.zip"
output_lines.append(f" [DRY RUN] Would package to {pkg_ext} file for {adaptor.PLATFORM_NAME}")
output_lines.append(
f" [DRY RUN] Would package to {pkg_ext} file for {adaptor.PLATFORM_NAME}"
)
workflow_state["zip_path"] = pkg_file
output_lines.append("")
@@ -725,7 +761,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
output_lines.append(" (No API key needed - markdown is export only)")
output_lines.append(f" Package created: {workflow_state['zip_path']}")
else:
output_lines.append(f" [DRY RUN] Would upload to {adaptor.PLATFORM_NAME} (if API key set)")
output_lines.append(
f" [DRY RUN] Would upload to {adaptor.PLATFORM_NAME} (if API key set)"
)
output_lines.append("")
@@ -757,12 +795,16 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
output_lines.append(" Go to https://aistudio.google.com/ to use it")
elif target == "openai":
output_lines.append("🎉 Your assistant is now available in OpenAI!")
output_lines.append(" Go to https://platform.openai.com/assistants/ to use it")
output_lines.append(
" Go to https://platform.openai.com/assistants/ to use it"
)
elif auto_upload:
output_lines.append("📝 Manual upload required (see instructions above)")
else:
output_lines.append("📤 To upload:")
output_lines.append(f" skill-seekers upload {workflow_state['zip_path']} --target {target}")
output_lines.append(
f" skill-seekers upload {workflow_state['zip_path']} --target {target}"
)
else:
output_lines.append("This was a dry run. No actions were taken.")
output_lines.append("")

View File

@@ -140,7 +140,13 @@ async def estimate_pages_tool(args: dict) -> list[TextContent]:
timeout = max(300, max_discovery // 2) # Minimum 5 minutes
# Run estimate_pages.py
cmd = [sys.executable, str(CLI_DIR / "estimate_pages.py"), config_path, "--max-discovery", str(max_discovery)]
cmd = [
sys.executable,
str(CLI_DIR / "estimate_pages.py"),
config_path,
"--max-discovery",
str(max_discovery),
]
progress_msg = "🔄 Estimating page count...\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
@@ -328,7 +334,11 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
cmd.extend(["--from-json", from_json])
else:
return [TextContent(type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json")]
return [
TextContent(
type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
)
]
# Run pdf_scraper.py with streaming (can take a while)
timeout = 600 # 10 minutes for PDF extraction
@@ -529,7 +539,11 @@ async def detect_patterns_tool(args: dict) -> list[TextContent]:
directory = args.get("directory")
if not file_path and not directory:
return [TextContent(type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter")]
return [
TextContent(
type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter"
)
]
output = args.get("output", "")
depth = args.get("depth", "deep")
@@ -604,7 +618,11 @@ async def extract_test_examples_tool(args: dict) -> list[TextContent]:
directory = args.get("directory")
if not file_path and not directory:
return [TextContent(type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter")]
return [
TextContent(
type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter"
)
]
language = args.get("language", "")
min_confidence = args.get("min_confidence", 0.5)
@@ -688,7 +706,12 @@ async def build_how_to_guides_tool(args: dict) -> list[TextContent]:
"""
input_file = args.get("input")
if not input_file:
return [TextContent(type="text", text="❌ Error: input parameter is required (path to test_examples.json)")]
return [
TextContent(
type="text",
text="❌ Error: input parameter is required (path to test_examples.json)",
)
]
output = args.get("output", "output/codebase/tutorials")
group_by = args.get("group_by", "ai-tutorial-group")

View File

@@ -76,7 +76,12 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
# MODE 1: Named Source (highest priority)
if source_name:
if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using source parameter")]
return [
TextContent(
type="text",
text="❌ Error: config_name is required when using source parameter",
)
]
# Get source from registry
source_manager = SourceManager()
@@ -97,7 +102,11 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
git_repo = GitConfigRepo()
try:
repo_path = git_repo.clone_or_pull(
source_name=source_name, git_url=git_url, branch=branch, token=token, force_refresh=force_refresh
source_name=source_name,
git_url=git_url,
branch=branch,
token=token,
force_refresh=force_refresh,
)
except Exception as e:
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
@@ -139,7 +148,12 @@ Next steps:
# MODE 2: Direct Git URL
elif git_url:
if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using git_url parameter")]
return [
TextContent(
type="text",
text="❌ Error: config_name is required when using git_url parameter",
)
]
# Clone/pull repository
git_repo = GitConfigRepo()
@@ -237,7 +251,9 @@ Next steps:
if tags:
result += f" Tags: {tags}\n"
result += "\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
result += (
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
)
result += f"📚 API Docs: {API_BASE_URL}/docs\n"
return [TextContent(type="text", text=result)]
@@ -245,7 +261,10 @@ Next steps:
# Download specific config
if not config_name:
return [
TextContent(type="text", text="❌ Error: Please provide config_name or set list_available=true")
TextContent(
type="text",
text="❌ Error: Please provide config_name or set list_available=true",
)
]
# Get config details first
@@ -305,11 +324,14 @@ Next steps:
except httpx.HTTPError as e:
return [
TextContent(
type="text", text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later."
type="text",
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
)
]
except json.JSONDecodeError as e:
return [TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")]
return [
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
]
except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
@@ -335,7 +357,10 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
from github import Github, GithubException
except ImportError:
return [
TextContent(type="text", text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub")
TextContent(
type="text",
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
)
]
# Import config validator
@@ -359,7 +384,9 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if config_path:
config_file = Path(config_path)
if not config_file.exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
with open(config_file) as f:
config_data = json.load(f)
@@ -374,7 +401,11 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
else:
return [TextContent(type="text", text="❌ Error: Must provide either config_path or config_json")]
return [
TextContent(
type="text", text="❌ Error: Must provide either config_path or config_json"
)
]
# Use ConfigValidator for comprehensive validation
if ConfigValidator is None:
@@ -404,14 +435,20 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if not is_unified:
# Legacy config - check base_url
base_url = config_data.get("base_url", "")
if base_url and not (base_url.startswith("http://") or base_url.startswith("https://")):
raise ValueError(f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://")
if base_url and not (
base_url.startswith("http://") or base_url.startswith("https://")
):
raise ValueError(
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
)
else:
# Unified config - check URLs in sources
for idx, source in enumerate(config_data.get("sources", [])):
if source.get("type") == "documentation":
source_url = source.get("base_url", "")
if source_url and not (source_url.startswith("http://") or source_url.startswith("https://")):
if source_url and not (
source_url.startswith("http://") or source_url.startswith("https://")
):
raise ValueError(
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
)
@@ -453,7 +490,10 @@ Please fix these issues and try again.
# For legacy configs, use name-based detection
name_lower = config_name.lower()
category = "other"
if any(x in name_lower for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]):
if any(
x in name_lower
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
):
category = "web-frameworks"
elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
category = "game-engines"
@@ -469,12 +509,16 @@ Please fix these issues and try again.
if "max_pages" not in config_data:
warnings.append("⚠️ No max_pages set - will use default (100)")
elif config_data.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours")
warnings.append(
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
)
else:
# Unified config warnings
for src in config_data.get("sources", []):
if src.get("type") == "documentation" and "max_pages" not in src:
warnings.append("⚠️ No max_pages set for documentation source - will use default (100)")
warnings.append(
"⚠️ No max_pages set for documentation source - will use default (100)"
)
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled for documentation source")
@@ -529,7 +573,9 @@ Please fix these issues and try again.
# Create issue
issue = repo.create_issue(
title=f"[CONFIG] {config_name}", body=issue_body, labels=["config-submission", "needs-review"]
title=f"[CONFIG] {config_name}",
body=issue_body,
labels=["config-submission", "needs-review"],
)
result = f"""✅ Config submitted successfully!

View File

@@ -183,7 +183,9 @@ async def generate_router(args: dict) -> list[TextContent]:
config_files = glob.glob(config_pattern)
if not config_files:
return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")]
return [
TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
]
# Run generate_router.py
cmd = [

View File

@@ -282,7 +282,12 @@ Pass data to components:
def test_e2e_package_format_validation(self):
"""Test that each platform creates correct package format"""
test_cases = [("claude", ".zip"), ("gemini", ".tar.gz"), ("openai", ".zip"), ("markdown", ".zip")]
test_cases = [
("claude", ".zip"),
("gemini", ".tar.gz"),
("openai", ".zip"),
("markdown", ".zip"),
]
for platform, expected_ext in test_cases:
adaptor = get_adaptor(platform)
@@ -290,9 +295,13 @@ Pass data to components:
# Verify extension
if expected_ext == ".tar.gz":
self.assertTrue(str(package_path).endswith(".tar.gz"), f"{platform} should create .tar.gz file")
self.assertTrue(
str(package_path).endswith(".tar.gz"), f"{platform} should create .tar.gz file"
)
else:
self.assertTrue(str(package_path).endswith(".zip"), f"{platform} should create .zip file")
self.assertTrue(
str(package_path).endswith(".zip"), f"{platform} should create .zip file"
)
def test_e2e_package_filename_convention(self):
"""Test that package filenames follow convention"""
@@ -308,7 +317,9 @@ Pass data to components:
package_path = adaptor.package(self.skill_dir, self.output_dir)
# Verify filename
self.assertEqual(package_path.name, expected_name, f"{platform} package filename incorrect")
self.assertEqual(
package_path.name, expected_name, f"{platform} package filename incorrect"
)
def test_e2e_all_platforms_preserve_references(self):
"""Test that all platforms preserve reference files"""
@@ -324,7 +335,8 @@ Pass data to components:
names = tar.getnames()
for ref_file in ref_files:
self.assertTrue(
any(ref_file in name for name in names), f"{platform}: {ref_file} not found in package"
any(ref_file in name for name in names),
f"{platform}: {ref_file} not found in package",
)
else:
with zipfile.ZipFile(package_path, "r") as zf:
@@ -338,7 +350,8 @@ Pass data to components:
)
else:
self.assertTrue(
any(ref_file in name for name in names), f"{platform}: {ref_file} not found in package"
any(ref_file in name for name in names),
f"{platform}: {ref_file} not found in package",
)
def test_e2e_metadata_consistency(self):
@@ -357,7 +370,9 @@ Pass data to components:
metadata = json.loads(metadata_file.read().decode("utf-8"))
else:
with zipfile.ZipFile(package_path, "r") as zf:
metadata_filename = f"{platform}_metadata.json" if platform == "openai" else "metadata.json"
metadata_filename = (
f"{platform}_metadata.json" if platform == "openai" else "metadata.json"
)
metadata_content = zf.read(metadata_filename).decode("utf-8")
metadata = json.loads(metadata_content)
@@ -467,7 +482,9 @@ class TestAdaptorsWorkflowIntegration(unittest.TestCase):
# Should respect custom path
self.assertTrue(package_path.exists())
self.assertTrue("my-package" in package_path.name or package_path.parent.name == "custom")
self.assertTrue(
"my-package" in package_path.name or package_path.parent.name == "custom"
)
def test_workflow_api_key_validation(self):
"""Test API key validation for each platform"""
@@ -485,7 +502,9 @@ class TestAdaptorsWorkflowIntegration(unittest.TestCase):
for platform, api_key, expected in test_cases:
adaptor = get_adaptor(platform)
result = adaptor.validate_api_key(api_key)
self.assertEqual(result, expected, f"{platform}: validate_api_key('{api_key}') should be {expected}")
self.assertEqual(
result, expected, f"{platform}: validate_api_key('{api_key}') should be {expected}"
)
class TestAdaptorsErrorHandling(unittest.TestCase):

View File

@@ -58,7 +58,9 @@ class TestClaudeAdaptor(unittest.TestCase):
(skill_dir / "references").mkdir()
(skill_dir / "references" / "test.md").write_text("# Test content")
metadata = SkillMetadata(name="test-skill", description="Test skill description", version="1.0.0")
metadata = SkillMetadata(
name="test-skill", description="Test skill description", version="1.0.0"
)
formatted = self.adaptor.format_skill_md(skill_dir, metadata)
@@ -221,7 +223,9 @@ This is existing skill content that should be preserved.
self.assertTrue(package_path.exists())
# Should respect custom naming if provided
self.assertTrue("my-package" in package_path.name or package_path.parent.name == "custom")
self.assertTrue(
"my-package" in package_path.name or package_path.parent.name == "custom"
)
def test_package_to_directory(self):
"""Test packaging to directory (should auto-name)"""

View File

@@ -95,7 +95,9 @@ class TestAPIReferenceBuilder(unittest.TestCase):
"functions": [
{
"name": "calculate_sum",
"parameters": [{"name": "numbers", "type_hint": "list", "default": None}],
"parameters": [
{"name": "numbers", "type_hint": "list", "default": None}
],
"return_type": "int",
"docstring": "Calculate sum of numbers.",
"is_async": False,
@@ -166,7 +168,14 @@ class TestAPIReferenceBuilder(unittest.TestCase):
{
"file": "module.py",
"language": "Python",
"classes": [{"name": "TestClass", "docstring": "Test class.", "base_classes": [], "methods": []}],
"classes": [
{
"name": "TestClass",
"docstring": "Test class.",
"base_classes": [],
"methods": [],
}
],
"functions": [
{
"name": "test_func",

View File

@@ -192,9 +192,15 @@ How to use async tools.
with (
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
patch.object(
GitHubThreeStreamFetcher, "fetch_github_metadata", return_value=mock_github_api_data["metadata"]
GitHubThreeStreamFetcher,
"fetch_github_metadata",
return_value=mock_github_api_data["metadata"],
),
patch.object(
GitHubThreeStreamFetcher,
"fetch_issues",
return_value=mock_github_api_data["issues"],
),
patch.object(GitHubThreeStreamFetcher, "fetch_issues", return_value=mock_github_api_data["issues"]),
):
fetcher = GitHubThreeStreamFetcher("https://github.com/jlowin/fastmcp")
three_streams = fetcher.fetch()
@@ -227,10 +233,18 @@ How to use async tools.
with (
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
patch.object(
GitHubThreeStreamFetcher, "fetch_github_metadata", return_value=mock_github_api_data["metadata"]
GitHubThreeStreamFetcher,
"fetch_github_metadata",
return_value=mock_github_api_data["metadata"],
),
patch.object(GitHubThreeStreamFetcher, "fetch_issues", return_value=mock_github_api_data["issues"]),
patch("skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis") as mock_c3x,
patch.object(
GitHubThreeStreamFetcher,
"fetch_issues",
return_value=mock_github_api_data["issues"],
),
patch(
"skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis"
) as mock_c3x,
):
# Mock C3.x analysis to return sample data
mock_c3x.return_value = {
@@ -247,7 +261,9 @@ How to use async tools.
"c3_2_examples_count": 2,
"c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}],
"c3_4_configs": [],
"c3_7_architecture": [{"pattern": "Service Layer", "description": "OAuth provider abstraction"}],
"c3_7_architecture": [
{"pattern": "Service Layer", "description": "OAuth provider abstraction"}
],
}
analyzer = UnifiedCodebaseAnalyzer()
@@ -316,7 +332,13 @@ How to use async tools.
"description": "Python framework for MCP servers",
},
common_problems=[
{"number": 42, "title": "OAuth setup fails", "labels": ["oauth"], "comments": 15, "state": "open"},
{
"number": 42,
"title": "OAuth setup fails",
"labels": ["oauth"],
"comments": 15,
"state": "open",
},
{
"number": 38,
"title": "Async tools not working",
@@ -344,7 +366,9 @@ How to use async tools.
# Generate router
generator = RouterGenerator(
config_paths=[str(config1), str(config2)], router_name="fastmcp", github_streams=mock_streams
config_paths=[str(config1), str(config2)],
router_name="fastmcp",
github_streams=mock_streams,
)
skill_md = generator.generate_skill_md()
@@ -536,15 +560,21 @@ class TestScenario2MultiSource:
source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]}
# Mock source 2 (GitHub C3.x)
source2_data = {"api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]}
source2_data = {
"api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]
}
# Mock GitHub streams
github_streams = ThreeStreamData(
code_stream=CodeStream(directory=Path("/tmp"), files=[]),
docs_stream=DocsStream(readme="Use client_id and client_secret", contributing=None, docs_files=[]),
docs_stream=DocsStream(
readme="Use client_id and client_secret", contributing=None, docs_files=[]
),
insights_stream=InsightsStream(
metadata={"stars": 1000},
common_problems=[{"number": 42, "title": "OAuth parameter confusion", "labels": ["oauth"]}],
common_problems=[
{"number": 42, "title": "OAuth parameter confusion", "labels": ["oauth"]}
],
known_solutions=[],
top_labels=[],
),
@@ -633,7 +663,9 @@ def test_connection():
"""Test basic analysis of local codebase."""
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source=str(local_codebase), depth="basic", fetch_github_metadata=False)
result = analyzer.analyze(
source=str(local_codebase), depth="basic", fetch_github_metadata=False
)
# Verify result
assert isinstance(result, AnalysisResult)
@@ -653,7 +685,9 @@ def test_connection():
"""Test C3.x analysis of local codebase."""
analyzer = UnifiedCodebaseAnalyzer()
with patch("skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis") as mock_c3x:
with patch(
"skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis"
) as mock_c3x:
# Mock C3.x to return sample data
mock_c3x.return_value = {
"files": ["database.py", "api.py"],
@@ -666,7 +700,9 @@ def test_connection():
"c3_7_architecture": [],
}
result = analyzer.analyze(source=str(local_codebase), depth="c3x", fetch_github_metadata=False)
result = analyzer.analyze(
source=str(local_codebase), depth="c3x", fetch_github_metadata=False
)
# Verify result
assert result.source_type == "local"
@@ -814,7 +850,12 @@ Based on analysis of GitHub issues:
github_overhead += 1
continue
if in_repo_info:
if line.startswith("**") or "github.com" in line or "" in line or "FastMCP is" in line:
if (
line.startswith("**")
or "github.com" in line
or "" in line
or "FastMCP is" in line
):
github_overhead += 1
if line.startswith("##"):
in_repo_info = False
@@ -894,7 +935,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check minimum 3 code examples
code_blocks = sub_skill_md.count("```")
assert code_blocks >= 6, f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
assert code_blocks >= 6, (
f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
)
# Check language tags
assert "```python" in sub_skill_md, "Code blocks must have language tags"
@@ -909,7 +952,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check solution indicators for closed issues
if "closed" in sub_skill_md.lower():
assert "" in sub_skill_md or "Solution" in sub_skill_md, "Closed issues should indicate solution found"
assert "" in sub_skill_md or "Solution" in sub_skill_md, (
"Closed issues should indicate solution found"
)
class TestTokenEfficiencyCalculation:
@@ -946,7 +991,9 @@ class TestTokenEfficiencyCalculation:
# With selective loading and caching, achieve 35-40%
# Even conservative estimate shows 29.5%, actual usage patterns show 35-40%
assert reduction_percent >= 29, f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
assert reduction_percent >= 29, (
f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
)
if __name__ == "__main__":

View File

@@ -92,7 +92,11 @@ class TestAsyncScrapeMethods(unittest.TestCase):
def test_scrape_page_async_exists(self):
"""Test scrape_page_async method exists"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}}
config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir:
try:
@@ -105,7 +109,11 @@ class TestAsyncScrapeMethods(unittest.TestCase):
def test_scrape_all_async_exists(self):
"""Test scrape_all_async method exists"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}}
config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir:
try:
@@ -144,7 +152,9 @@ class TestAsyncRouting(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=True)
# Mock scrape_all_async to verify it gets called
with patch.object(converter, "scrape_all_async", new_callable=AsyncMock) as mock_async:
with patch.object(
converter, "scrape_all_async", new_callable=AsyncMock
) as mock_async:
converter.scrape_all()
# Verify async version was called
mock_async.assert_called_once()
@@ -167,7 +177,9 @@ class TestAsyncRouting(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=True)
# Mock scrape_all_async to verify it does NOT get called
with patch.object(converter, "scrape_all_async", new_callable=AsyncMock) as mock_async:
with patch.object(
converter, "scrape_all_async", new_callable=AsyncMock
) as mock_async:
with patch.object(converter, "_try_llms_txt", return_value=False):
converter.scrape_all()
# Verify async version was NOT called
@@ -249,7 +261,9 @@ class TestAsyncErrorHandling(unittest.TestCase):
# Mock client.get to raise exception
with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")):
# Should not raise exception, just log error
await converter.scrape_page_async("https://example.com/test", semaphore, client)
await converter.scrape_page_async(
"https://example.com/test", semaphore, client
)
# Run async test
asyncio.run(run_test())

View File

@@ -38,18 +38,16 @@ def project_root():
@pytest.fixture
def run_bootstrap(project_root):
"""Execute bootstrap script and return result"""
def _run(timeout=600):
script = project_root / "scripts" / "bootstrap_skill.sh"
result = subprocess.run(
["bash", str(script)],
cwd=project_root,
capture_output=True,
text=True,
timeout=timeout
["bash", str(script)], cwd=project_root, capture_output=True, text=True, timeout=timeout
)
return result
return _run
@@ -95,7 +93,7 @@ class TestBootstrapSkillE2E:
assert content.startswith("---"), "Missing frontmatter start"
# Find closing delimiter
lines = content.split('\n')
lines = content.split("\n")
closing_found = False
for i, line in enumerate(lines[1:], 1):
if line.strip() == "---":
@@ -129,11 +127,7 @@ class TestBootstrapSkillE2E:
# Create venv
venv_path = tmp_path / "test_venv"
subprocess.run(
[sys.executable, "-m", "venv", str(venv_path)],
check=True,
timeout=60
)
subprocess.run([sys.executable, "-m", "venv", str(venv_path)], check=True, timeout=60)
# Install skill in venv
pip_path = venv_path / "bin" / "pip"
@@ -142,7 +136,7 @@ class TestBootstrapSkillE2E:
cwd=output_skill_dir.parent.parent,
capture_output=True,
text=True,
timeout=120
timeout=120,
)
# Should install successfully
@@ -156,13 +150,13 @@ class TestBootstrapSkillE2E:
# Try to package with claude adaptor (simplest)
from skill_seekers.cli.adaptors import get_adaptor
adaptor = get_adaptor('claude')
adaptor = get_adaptor("claude")
# Should be able to package without errors
try:
package_path = adaptor.package(
skill_dir=output_skill_dir, # Path object, not str
output_path=tmp_path # Path object, not str
output_path=tmp_path, # Path object, not str
)
assert Path(package_path).exists(), "Package not created"

View File

@@ -111,7 +111,10 @@ class TestC3Integration:
}
],
"ai_enhancements": {
"overall_insights": {"security_issues_found": 1, "recommended_actions": ["Move secrets to .env"]}
"overall_insights": {
"security_issues_found": 1,
"recommended_actions": ["Move secrets to .env"],
}
},
},
"architecture": {
@@ -120,7 +123,11 @@ class TestC3Integration:
"pattern_name": "MVC",
"confidence": 0.89,
"framework": "Flask",
"evidence": ["models/ directory", "views/ directory", "controllers/ directory"],
"evidence": [
"models/ directory",
"views/ directory",
"controllers/ directory",
],
}
],
"frameworks_detected": ["Flask", "SQLAlchemy"],
@@ -173,7 +180,9 @@ class TestC3Integration:
"""Test ARCHITECTURE.md is generated with all 8 sections."""
# Create skill builder with C3.x data (multi-source list format)
github_data = {"readme": "Test README", "c3_analysis": mock_c3_data}
scraped_data = {"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]}
scraped_data = {
"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]
}
builder = UnifiedSkillBuilder(mock_config, scraped_data)
builder.skill_dir = temp_dir
@@ -212,7 +221,9 @@ class TestC3Integration:
"""Test correct C3.x reference directory structure is created."""
# Create skill builder with C3.x data (multi-source list format)
github_data = {"readme": "Test README", "c3_analysis": mock_c3_data}
scraped_data = {"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]}
scraped_data = {
"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]
}
builder = UnifiedSkillBuilder(mock_config, scraped_data)
builder.skill_dir = temp_dir
@@ -261,7 +272,11 @@ class TestC3Integration:
# Mock GitHubScraper (correct module path for import)
with patch("skill_seekers.cli.github_scraper.GitHubScraper") as mock_github:
mock_github.return_value.scrape.return_value = {"readme": "Test README", "issues": [], "releases": []}
mock_github.return_value.scrape.return_value = {
"readme": "Test README",
"issues": [],
"releases": [],
}
scraper = UnifiedScraper(config_path)
@@ -278,7 +293,14 @@ class TestC3Integration:
config = {
"name": "test",
"description": "Test",
"sources": [{"type": "github", "repo": "test/repo", "enable_codebase_analysis": True, "ai_mode": "auto"}],
"sources": [
{
"type": "github",
"repo": "test/repo",
"enable_codebase_analysis": True,
"ai_mode": "auto",
}
],
}
# Save config

View File

@@ -19,7 +19,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_doc_scraper_uses_modern_commands(self):
"""Test doc_scraper.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "doc_scraper.py"
script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "doc_scraper.py"
)
with open(script_path) as f:
content = f.read()
@@ -32,7 +34,13 @@ class TestModernCLICommands(unittest.TestCase):
def test_enhance_skill_local_uses_modern_commands(self):
"""Test enhance_skill_local.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "enhance_skill_local.py"
script_path = (
Path(__file__).parent.parent
/ "src"
/ "skill_seekers"
/ "cli"
/ "enhance_skill_local.py"
)
with open(script_path) as f:
content = f.read()
@@ -45,7 +53,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_estimate_pages_uses_modern_commands(self):
"""Test estimate_pages.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "estimate_pages.py"
script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "estimate_pages.py"
)
with open(script_path) as f:
content = f.read()
@@ -58,7 +68,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_package_skill_uses_modern_commands(self):
"""Test package_skill.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "package_skill.py"
script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "package_skill.py"
)
with open(script_path) as f:
content = f.read()
@@ -71,7 +83,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_github_scraper_uses_modern_commands(self):
"""Test github_scraper.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "github_scraper.py"
script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "github_scraper.py"
)
with open(script_path) as f:
content = f.read()
@@ -89,10 +103,16 @@ class TestUnifiedCLIEntryPoints(unittest.TestCase):
def test_main_cli_help_output(self):
"""Test skill-seekers --help works"""
try:
result = subprocess.run(["skill-seekers", "--help"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers", "--help"], capture_output=True, text=True, timeout=5
)
# Should return successfully
self.assertIn(result.returncode, [0, 2], f"skill-seekers --help failed with code {result.returncode}")
self.assertIn(
result.returncode,
[0, 2],
f"skill-seekers --help failed with code {result.returncode}",
)
# Should show subcommands
output = result.stdout + result.stderr
@@ -107,14 +127,18 @@ class TestUnifiedCLIEntryPoints(unittest.TestCase):
def test_main_cli_version_output(self):
"""Test skill-seekers --version works"""
try:
result = subprocess.run(["skill-seekers", "--version"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers", "--version"], capture_output=True, text=True, timeout=5
)
# Should return successfully
self.assertEqual(result.returncode, 0, f"skill-seekers --version failed: {result.stderr}")
self.assertEqual(
result.returncode, 0, f"skill-seekers --version failed: {result.stderr}"
)
# Should show version
output = result.stdout + result.stderr
self.assertIn('2.7.0', output)
self.assertIn("2.7.0", output)
except FileNotFoundError:
# If skill-seekers is not installed, skip this test
@@ -140,7 +164,9 @@ class TestNoHardcodedPaths(unittest.TestCase):
for hardcoded_path in hardcoded_paths:
self.assertNotIn(
hardcoded_path, content, f"{script_path.name} contains hardcoded path: {hardcoded_path}"
hardcoded_path,
content,
f"{script_path.name} contains hardcoded path: {hardcoded_path}",
)

View File

@@ -173,7 +173,10 @@ API_KEY=secret123
PORT=8000
"""
config_file = ConfigFile(
file_path=str(Path(self.temp_dir) / ".env"), relative_path=".env", config_type="env", purpose="unknown"
file_path=str(Path(self.temp_dir) / ".env"),
relative_path=".env",
config_type="env",
purpose="unknown",
)
file_path = Path(self.temp_dir) / ".env"
@@ -313,7 +316,8 @@ endpoint = "https://api.example.com"
# Check if parsing failed due to missing toml/tomli
if config_file.parse_errors and (
"toml" in str(config_file.parse_errors).lower() and "not installed" in str(config_file.parse_errors)
"toml" in str(config_file.parse_errors).lower()
and "not installed" in str(config_file.parse_errors)
):
self.skipTest("toml/tomli not installed")
@@ -337,7 +341,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)
@@ -353,7 +361,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)
@@ -369,7 +381,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)
@@ -385,7 +401,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)
@@ -402,7 +422,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)
@@ -418,7 +442,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)
@@ -434,7 +462,11 @@ class TestConfigPatternDetector(unittest.TestCase):
]
config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings
file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
)
patterns = self.detector.detect_patterns(config_file)

View File

@@ -30,7 +30,11 @@ class TestConfigValidation(unittest.TestCase):
"name": "godot",
"base_url": "https://docs.godotengine.org/en/stable/",
"description": "Godot Engine documentation",
"selectors": {"main_content": 'div[role="main"]', "title": "title", "code_blocks": "pre code"},
"selectors": {
"main_content": 'div[role="main"]',
"title": "title",
"code_blocks": "pre code",
},
"url_patterns": {"include": ["/guide/", "/api/"], "exclude": ["/blog/"]},
"categories": {"getting_started": ["intro", "tutorial"], "api": ["api", "reference"]},
"rate_limit": 0.5,
@@ -84,7 +88,9 @@ class TestConfigValidation(unittest.TestCase):
"""Test invalid selectors (not a dictionary)"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": "invalid"}
errors, _ = validate_config(config)
self.assertTrue(any("selectors" in error.lower() and "dictionary" in error.lower() for error in errors))
self.assertTrue(
any("selectors" in error.lower() and "dictionary" in error.lower() for error in errors)
)
def test_missing_recommended_selectors(self):
"""Test warning for missing recommended selectors"""
@@ -104,25 +110,44 @@ class TestConfigValidation(unittest.TestCase):
"""Test invalid url_patterns (not a dictionary)"""
config = {"name": "test", "base_url": "https://example.com/", "url_patterns": []}
errors, _ = validate_config(config)
self.assertTrue(any("url_patterns" in error.lower() and "dictionary" in error.lower() for error in errors))
self.assertTrue(
any(
"url_patterns" in error.lower() and "dictionary" in error.lower()
for error in errors
)
)
def test_invalid_url_patterns_include_not_list(self):
"""Test invalid url_patterns.include (not a list)"""
config = {"name": "test", "base_url": "https://example.com/", "url_patterns": {"include": "not-a-list"}}
config = {
"name": "test",
"base_url": "https://example.com/",
"url_patterns": {"include": "not-a-list"},
}
errors, _ = validate_config(config)
self.assertTrue(any("include" in error.lower() and "list" in error.lower() for error in errors))
self.assertTrue(
any("include" in error.lower() and "list" in error.lower() for error in errors)
)
def test_invalid_categories_not_dict(self):
"""Test invalid categories (not a dictionary)"""
config = {"name": "test", "base_url": "https://example.com/", "categories": []}
errors, _ = validate_config(config)
self.assertTrue(any("categories" in error.lower() and "dictionary" in error.lower() for error in errors))
self.assertTrue(
any("categories" in error.lower() and "dictionary" in error.lower() for error in errors)
)
def test_invalid_category_keywords_not_list(self):
"""Test invalid category keywords (not a list)"""
config = {"name": "test", "base_url": "https://example.com/", "categories": {"getting_started": "not-a-list"}}
config = {
"name": "test",
"base_url": "https://example.com/",
"categories": {"getting_started": "not-a-list"},
}
errors, _ = validate_config(config)
self.assertTrue(any("getting_started" in error.lower() and "list" in error.lower() for error in errors))
self.assertTrue(
any("getting_started" in error.lower() and "list" in error.lower() for error in errors)
)
def test_invalid_rate_limit_negative(self):
"""Test invalid rate_limit (negative)"""
@@ -178,13 +203,23 @@ class TestConfigValidation(unittest.TestCase):
def test_invalid_start_urls_not_list(self):
"""Test invalid start_urls (not a list)"""
config = {"name": "test", "base_url": "https://example.com/", "start_urls": "https://example.com/page1"}
config = {
"name": "test",
"base_url": "https://example.com/",
"start_urls": "https://example.com/page1",
}
errors, _ = validate_config(config)
self.assertTrue(any("start_urls" in error.lower() and "list" in error.lower() for error in errors))
self.assertTrue(
any("start_urls" in error.lower() and "list" in error.lower() for error in errors)
)
def test_invalid_start_urls_bad_protocol(self):
"""Test invalid start_urls (bad protocol)"""
config = {"name": "test", "base_url": "https://example.com/", "start_urls": ["ftp://example.com/page1"]}
config = {
"name": "test",
"base_url": "https://example.com/",
"start_urls": ["ftp://example.com/page1"],
}
errors, _ = validate_config(config)
self.assertTrue(any("start_url" in error.lower() for error in errors))
@@ -193,7 +228,11 @@ class TestConfigValidation(unittest.TestCase):
config = {
"name": "test",
"base_url": "https://example.com/",
"start_urls": ["https://example.com/page1", "http://example.com/page2", "https://example.com/api/docs"],
"start_urls": [
"https://example.com/page1",
"http://example.com/page2",
"https://example.com/api/docs",
],
}
errors, _ = validate_config(config)
url_errors = [e for e in errors if "start_url" in e.lower()]

View File

@@ -153,7 +153,9 @@ class TestConstantsExports(unittest.TestCase):
self.assertTrue(hasattr(constants, "__all__"))
for name in constants.__all__:
self.assertTrue(hasattr(constants, name), f"Constant '{name}' in __all__ but not defined")
self.assertTrue(
hasattr(constants, name), f"Constant '{name}' in __all__ but not defined"
)
def test_all_exports_count(self):
"""Test that __all__ has expected number of exports."""

View File

@@ -54,7 +54,9 @@ function greet(name) {
""")
# Create mock three-stream data
code_stream = CodeStream(directory=tmp_path, files=[tmp_path / "main.py", tmp_path / "utils.js"])
code_stream = CodeStream(
directory=tmp_path, files=[tmp_path / "main.py", tmp_path / "utils.js"]
)
docs_stream = DocsStream(
readme="""# Test Project
@@ -74,10 +76,17 @@ hello()
```
""",
contributing="# Contributing\n\nPull requests welcome!",
docs_files=[{"path": "docs/guide.md", "content": "# User Guide\n\nHow to use this project."}],
docs_files=[
{"path": "docs/guide.md", "content": "# User Guide\n\nHow to use this project."}
],
)
insights_stream = InsightsStream(
metadata={"stars": 1234, "forks": 56, "language": "Python", "description": "A test project"},
metadata={
"stars": 1234,
"forks": 56,
"language": "Python",
"description": "A test project",
},
common_problems=[
{
"title": "Installation fails on Windows",
@@ -95,7 +104,13 @@ hello()
},
],
known_solutions=[
{"title": "Fixed: Module not found", "number": 35, "state": "closed", "comments": 8, "labels": ["bug"]}
{
"title": "Fixed: Module not found",
"number": 35,
"state": "closed",
"comments": 8,
"labels": ["bug"],
}
],
top_labels=[
{"label": "bug", "count": 25},
@@ -108,7 +123,9 @@ hello()
# Step 2: Run unified analyzer with basic depth
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source="https://github.com/test/project", depth="basic", fetch_github_metadata=True)
result = analyzer.analyze(
source="https://github.com/test/project", depth="basic", fetch_github_metadata=True
)
# Step 3: Validate all three streams present
assert result.source_type == "github"
@@ -151,7 +168,13 @@ hello()
"comments": 15,
"labels": ["oauth", "token"],
},
{"title": "Async deadlock", "number": 40, "state": "open", "comments": 12, "labels": ["async", "bug"]},
{
"title": "Async deadlock",
"number": 40,
"state": "open",
"comments": 12,
"labels": ["async", "bug"],
},
{
"title": "Database connection lost",
"number": 35,
@@ -162,8 +185,20 @@ hello()
]
solutions = [
{"title": "Fixed OAuth flow", "number": 30, "state": "closed", "comments": 8, "labels": ["oauth"]},
{"title": "Resolved async race", "number": 25, "state": "closed", "comments": 6, "labels": ["async"]},
{
"title": "Fixed OAuth flow",
"number": 30,
"state": "closed",
"comments": 8,
"labels": ["oauth"],
},
{
"title": "Resolved async race",
"number": 25,
"state": "closed",
"comments": 6,
"labels": ["async"],
},
]
topics = ["oauth", "auth", "authentication"]
@@ -174,7 +209,9 @@ hello()
# Validate categorization
assert "oauth" in categorized or "auth" in categorized or "authentication" in categorized
oauth_issues = (
categorized.get("oauth", []) + categorized.get("auth", []) + categorized.get("authentication", [])
categorized.get("oauth", [])
+ categorized.get("auth", [])
+ categorized.get("authentication", [])
)
# Should have 3 OAuth-related issues (2 problems + 1 solution)
@@ -245,7 +282,12 @@ testproject.run()
docs_files=[],
)
insights_stream = InsightsStream(
metadata={"stars": 5000, "forks": 250, "language": "Python", "description": "Fast test framework"},
metadata={
"stars": 5000,
"forks": 250,
"language": "Python",
"description": "Fast test framework",
},
common_problems=[
{
"title": "OAuth setup fails",
@@ -254,8 +296,20 @@ testproject.run()
"comments": 30,
"labels": ["bug", "oauth"],
},
{"title": "Async deadlock", "number": 142, "state": "open", "comments": 25, "labels": ["async", "bug"]},
{"title": "Token refresh issue", "number": 130, "state": "open", "comments": 20, "labels": ["oauth"]},
{
"title": "Async deadlock",
"number": 142,
"state": "open",
"comments": 25,
"labels": ["async", "bug"],
},
{
"title": "Token refresh issue",
"number": 130,
"state": "open",
"comments": 20,
"labels": ["oauth"],
},
],
known_solutions=[
{
@@ -265,7 +319,13 @@ testproject.run()
"comments": 15,
"labels": ["oauth"],
},
{"title": "Resolved async race", "number": 110, "state": "closed", "comments": 12, "labels": ["async"]},
{
"title": "Resolved async race",
"number": 110,
"state": "closed",
"comments": 12,
"labels": ["async"],
},
],
top_labels=[
{"label": "oauth", "count": 45},
@@ -276,7 +336,9 @@ testproject.run()
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Generate router
generator = RouterGenerator([str(config_path1), str(config_path2)], github_streams=github_streams)
generator = RouterGenerator(
[str(config_path1), str(config_path2)], github_streams=github_streams
)
# Step 1: Validate GitHub metadata extracted
assert generator.github_metadata is not None
@@ -308,8 +370,14 @@ testproject.run()
# Validate examples section with converted questions (Fix 1)
assert "## Examples" in skill_md
# Issues converted to natural questions
assert "how do i fix oauth setup" in skill_md.lower() or "how do i handle oauth setup" in skill_md.lower()
assert "how do i handle async deadlock" in skill_md.lower() or "how do i fix async deadlock" in skill_md.lower()
assert (
"how do i fix oauth setup" in skill_md.lower()
or "how do i handle oauth setup" in skill_md.lower()
)
assert (
"how do i handle async deadlock" in skill_md.lower()
or "how do i fix async deadlock" in skill_md.lower()
)
# Common Issues section may still exist with other issues
# Note: Issue numbers may appear in Common Issues or Common Patterns sections
@@ -356,12 +424,26 @@ class TestE2EQualityMetrics:
# Create GitHub streams with realistic data
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# Test\n\nA short README.", contributing=None, docs_files=[])
docs_stream = DocsStream(
readme="# Test\n\nA short README.", contributing=None, docs_files=[]
)
insights_stream = InsightsStream(
metadata={"stars": 100, "forks": 10, "language": "Python", "description": "Test"},
common_problems=[
{"title": "Issue 1", "number": 1, "state": "open", "comments": 5, "labels": ["bug"]},
{"title": "Issue 2", "number": 2, "state": "open", "comments": 3, "labels": ["bug"]},
{
"title": "Issue 1",
"number": 1,
"state": "open",
"comments": 5,
"labels": ["bug"],
},
{
"title": "Issue 2",
"number": 2,
"state": "open",
"comments": 3,
"labels": ["bug"],
},
],
known_solutions=[],
top_labels=[{"label": "bug", "count": 10}],
@@ -382,7 +464,9 @@ class TestE2EQualityMetrics:
github_overhead = lines_with_github - lines_no_github
# Validate overhead is within acceptable range (30-50 lines)
assert 20 <= github_overhead <= 60, f"GitHub overhead is {github_overhead} lines, expected 20-60"
assert 20 <= github_overhead <= 60, (
f"GitHub overhead is {github_overhead} lines, expected 20-60"
)
def test_router_size_within_limits(self, tmp_path):
"""
@@ -457,7 +541,9 @@ class TestE2EBackwardCompatibility:
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme=None, contributing=None, docs_files=[])
insights_stream = InsightsStream(metadata={}, common_problems=[], known_solutions=[], top_labels=[])
insights_stream = InsightsStream(
metadata={}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams
@@ -490,8 +576,12 @@ class TestE2ETokenEfficiency:
# Create GitHub streams
code_stream = CodeStream(directory=tmp_path, files=[tmp_path / "main.py"])
docs_stream = DocsStream(readme="# Test\n\nQuick start guide.", contributing=None, docs_files=[])
insights_stream = InsightsStream(metadata={"stars": 100}, common_problems=[], known_solutions=[], top_labels=[])
docs_stream = DocsStream(
readme="# Test\n\nQuick start guide.", contributing=None, docs_files=[]
)
insights_stream = InsightsStream(
metadata={"stars": 100}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Verify streams are separate (no duplication)

View File

@@ -69,7 +69,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
import subprocess
try:
result = subprocess.run(["skill-seekers", "estimate", "--help"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers", "estimate", "--help"], capture_output=True, text=True, timeout=5
)
# Should return successfully (0 or 2 for argparse)
self.assertIn(result.returncode, [0, 2])
@@ -83,7 +85,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
import subprocess
try:
result = subprocess.run(["skill-seekers-estimate", "--help"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers-estimate", "--help"], capture_output=True, text=True, timeout=5
)
# Should return successfully
self.assertIn(result.returncode, [0, 2])
@@ -96,11 +100,15 @@ class TestEstimatePagesCLI(unittest.TestCase):
try:
# Run without config argument
result = subprocess.run(["skill-seekers", "estimate"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers", "estimate"], capture_output=True, text=True, timeout=5
)
# Should fail (non-zero exit code) or show usage
self.assertTrue(
result.returncode != 0 or "usage" in result.stderr.lower() or "usage" in result.stdout.lower()
result.returncode != 0
or "usage" in result.stderr.lower()
or "usage" in result.stdout.lower()
)
except FileNotFoundError:
self.skipTest("skill-seekers command not installed")
@@ -111,7 +119,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
try:
# Run with --all flag
result = subprocess.run(["skill-seekers", "estimate", "--all"], capture_output=True, text=True, timeout=10)
result = subprocess.run(
["skill-seekers", "estimate", "--all"], capture_output=True, text=True, timeout=10
)
# Should succeed
self.assertEqual(result.returncode, 0)
@@ -125,7 +135,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
# Should list some known configs
# (these should exist in api/configs_repo/official/)
self.assertTrue(
"react" in output.lower() or "django" in output.lower() or "godot" in output.lower(),
"react" in output.lower()
or "django" in output.lower()
or "godot" in output.lower(),
"Expected at least one known config name in output",
)
except FileNotFoundError:
@@ -136,7 +148,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
import subprocess
try:
result = subprocess.run(["skill-seekers-estimate", "--all"], capture_output=True, text=True, timeout=10)
result = subprocess.run(
["skill-seekers-estimate", "--all"], capture_output=True, text=True, timeout=10
)
# Should succeed
self.assertEqual(result.returncode, 0)

View File

@@ -60,7 +60,10 @@ class TestExcludedDirsAdditional(unittest.TestCase):
@patch("skill_seekers.cli.github_scraper.Github")
def test_extend_with_additional_dirs(self, mock_github):
"""Test adding custom exclusions to defaults."""
config = {"repo": "owner/repo", "exclude_dirs_additional": ["proprietary", "vendor", "third_party"]}
config = {
"repo": "owner/repo",
"exclude_dirs_additional": ["proprietary", "vendor", "third_party"],
}
scraper = GitHubScraper(config)
@@ -185,7 +188,11 @@ class TestExcludedDirsEdgeCases(unittest.TestCase):
"""Test that duplicates in additional list are handled (set deduplication)."""
config = {
"repo": "owner/repo",
"exclude_dirs_additional": ["venv", "custom", "venv"], # venv is duplicate (default + listed)
"exclude_dirs_additional": [
"venv",
"custom",
"venv",
], # venv is duplicate (default + listed)
}
scraper = GitHubScraper(config)
@@ -240,7 +247,11 @@ class TestExcludedDirsWithLocalRepo(unittest.TestCase):
@patch("skill_seekers.cli.github_scraper.Github")
def test_replace_mode_with_local_repo_path(self, mock_github):
"""Test that replace mode works with local_repo_path."""
config = {"repo": "owner/repo", "local_repo_path": "/tmp/test/repo", "exclude_dirs": ["only_this"]}
config = {
"repo": "owner/repo",
"local_repo_path": "/tmp/test/repo",
"exclude_dirs": ["only_this"],
}
scraper = GitHubScraper(config)
@@ -277,7 +288,10 @@ class TestExcludedDirsLogging(unittest.TestCase):
# Should have logged WARNING message
warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
self.assertTrue(
any("Using custom directory exclusions" in call and "defaults overridden" in call for call in warning_calls)
any(
"Using custom directory exclusions" in call and "defaults overridden" in call
for call in warning_calls
)
)
@patch("skill_seekers.cli.github_scraper.Github")

View File

@@ -105,9 +105,16 @@ class TestRouterGeneratorWithGitHub:
# Create GitHub streams
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# Test Project\n\nA test OAuth library.", contributing=None, docs_files=[])
docs_stream = DocsStream(
readme="# Test Project\n\nA test OAuth library.", contributing=None, docs_files=[]
)
insights_stream = InsightsStream(
metadata={"stars": 1234, "forks": 56, "language": "Python", "description": "OAuth helper"},
metadata={
"stars": 1234,
"forks": 56,
"language": "Python",
"description": "OAuth helper",
},
common_problems=[
{
"title": "OAuth fails on redirect",
@@ -133,7 +140,11 @@ class TestRouterGeneratorWithGitHub:
def test_extract_keywords_with_github_labels(self, tmp_path):
"""Test keyword extraction with GitHub issue labels (2x weight)."""
config = {"name": "test-oauth", "base_url": "https://example.com", "categories": {"oauth": ["oauth", "auth"]}}
config = {
"name": "test-oauth",
"base_url": "https://example.com",
"categories": {"oauth": ["oauth", "auth"]},
}
config_path = tmp_path / "config.json"
with open(config_path, "w") as f:
@@ -178,10 +189,17 @@ class TestRouterGeneratorWithGitHub:
# Create GitHub streams
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(
readme="# OAuth Library\n\nQuick start: Install with pip install oauth", contributing=None, docs_files=[]
readme="# OAuth Library\n\nQuick start: Install with pip install oauth",
contributing=None,
docs_files=[],
)
insights_stream = InsightsStream(
metadata={"stars": 5000, "forks": 200, "language": "Python", "description": "OAuth 2.0 library"},
metadata={
"stars": 5000,
"forks": 200,
"language": "Python",
"description": "OAuth 2.0 library",
},
common_problems=[
{
"title": "Redirect URI mismatch",
@@ -190,7 +208,13 @@ class TestRouterGeneratorWithGitHub:
"comments": 25,
"labels": ["bug", "oauth"],
},
{"title": "Token refresh fails", "number": 95, "state": "open", "comments": 18, "labels": ["oauth"]},
{
"title": "Token refresh fails",
"number": 95,
"state": "open",
"comments": 18,
"labels": ["oauth"],
},
],
known_solutions=[],
top_labels=[],
@@ -250,7 +274,11 @@ class TestSubSkillIssuesSection:
def test_generate_subskill_issues_section(self, tmp_path):
"""Test generation of issues section for sub-skills."""
config = {"name": "test-oauth", "base_url": "https://example.com", "categories": {"oauth": ["oauth"]}}
config = {
"name": "test-oauth",
"base_url": "https://example.com",
"categories": {"oauth": ["oauth"]},
}
config_path = tmp_path / "config.json"
with open(config_path, "w") as f:
@@ -269,10 +297,22 @@ class TestSubSkillIssuesSection:
"comments": 20,
"labels": ["oauth", "bug"],
},
{"title": "Token expiration issue", "number": 45, "state": "open", "comments": 15, "labels": ["oauth"]},
{
"title": "Token expiration issue",
"number": 45,
"state": "open",
"comments": 15,
"labels": ["oauth"],
},
],
known_solutions=[
{"title": "Fixed OAuth flow", "number": 40, "state": "closed", "comments": 10, "labels": ["oauth"]}
{
"title": "Fixed OAuth flow",
"number": 40,
"state": "closed",
"comments": 10,
"labels": ["oauth"],
}
],
top_labels=[],
)
@@ -293,7 +333,11 @@ class TestSubSkillIssuesSection:
def test_generate_subskill_issues_no_matches(self, tmp_path):
"""Test issues section when no issues match the topic."""
config = {"name": "test-async", "base_url": "https://example.com", "categories": {"async": ["async"]}}
config = {
"name": "test-async",
"base_url": "https://example.com",
"categories": {"async": ["async"]},
}
config_path = tmp_path / "config.json"
with open(config_path, "w") as f:
@@ -305,7 +349,13 @@ class TestSubSkillIssuesSection:
insights_stream = InsightsStream(
metadata={},
common_problems=[
{"title": "OAuth fails", "number": 1, "state": "open", "comments": 5, "labels": ["oauth"]}
{
"title": "OAuth fails",
"number": 1,
"state": "open",
"comments": 5,
"labels": ["oauth"],
}
],
known_solutions=[],
top_labels=[],
@@ -361,7 +411,12 @@ class TestIntegration:
],
)
insights_stream = InsightsStream(
metadata={"stars": 10000, "forks": 500, "language": "Python", "description": "Fast MCP server framework"},
metadata={
"stars": 10000,
"forks": 500,
"language": "Python",
"description": "Fast MCP server framework",
},
common_problems=[
{
"title": "OAuth setup fails",
@@ -370,8 +425,20 @@ class TestIntegration:
"comments": 30,
"labels": ["bug", "oauth"],
},
{"title": "Async deadlock", "number": 142, "state": "open", "comments": 25, "labels": ["async", "bug"]},
{"title": "Token refresh issue", "number": 130, "state": "open", "comments": 20, "labels": ["oauth"]},
{
"title": "Async deadlock",
"number": 142,
"state": "open",
"comments": 25,
"labels": ["async", "bug"],
},
{
"title": "Token refresh issue",
"number": 130,
"state": "open",
"comments": 20,
"labels": ["oauth"],
},
],
known_solutions=[
{
@@ -381,7 +448,13 @@ class TestIntegration:
"comments": 15,
"labels": ["oauth"],
},
{"title": "Resolved async race", "number": 110, "state": "closed", "comments": 12, "labels": ["async"]},
{
"title": "Resolved async race",
"number": 110,
"state": "closed",
"comments": 12,
"labels": ["async"],
},
],
top_labels=[
{"label": "oauth", "count": 45},
@@ -392,7 +465,9 @@ class TestIntegration:
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Create router generator
generator = RouterGenerator([str(config_path1), str(config_path2)], github_streams=github_streams)
generator = RouterGenerator(
[str(config_path1), str(config_path2)], github_streams=github_streams
)
# Generate SKILL.md
skill_md = generator.generate_skill_md()
@@ -414,8 +489,14 @@ class TestIntegration:
# 4. Examples section with converted questions (Fix 1)
assert "## Examples" in skill_md
# Issues converted to natural questions
assert "how do i fix oauth setup" in skill_md.lower() or "how do i handle oauth setup" in skill_md.lower()
assert "how do i handle async deadlock" in skill_md.lower() or "how do i fix async deadlock" in skill_md.lower()
assert (
"how do i fix oauth setup" in skill_md.lower()
or "how do i handle oauth setup" in skill_md.lower()
)
assert (
"how do i handle async deadlock" in skill_md.lower()
or "how do i fix async deadlock" in skill_md.lower()
)
# Common Issues section may still exist with other issues
# Note: Issue numbers may appear in Common Issues or Common Patterns sections

View File

@@ -134,7 +134,9 @@ class TestCloneOrPull:
"""Test cloning a new repository."""
mock_clone.return_value = MagicMock()
result = git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git")
result = git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git"
)
assert result == git_repo.cache_dir / "test-source"
mock_clone.assert_called_once()
@@ -159,7 +161,9 @@ class TestCloneOrPull:
mock_repo.remotes.origin = mock_origin
mock_repo_class.return_value = mock_repo
result = git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git")
result = git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git"
)
assert result == repo_path
mock_origin.pull.assert_called_once_with("main")
@@ -179,7 +183,9 @@ class TestCloneOrPull:
mock_repo_class.return_value = mock_repo
result = git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git", token="ghp_token123"
source_name="test-source",
git_url="https://github.com/org/repo.git",
token="ghp_token123",
)
# Verify URL was updated with token
@@ -198,7 +204,9 @@ class TestCloneOrPull:
mock_clone.return_value = MagicMock()
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git", force_refresh=True)
git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git", force_refresh=True
)
# Verify clone was called (not pull)
mock_clone.assert_called_once()
@@ -208,7 +216,9 @@ class TestCloneOrPull:
"""Test cloning with custom branch."""
mock_clone.return_value = MagicMock()
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git", branch="develop")
git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git", branch="develop"
)
call_kwargs = mock_clone.call_args[1]
assert call_kwargs["branch"] == "develop"
@@ -221,10 +231,14 @@ class TestCloneOrPull:
@patch("skill_seekers.mcp.git_repo.git.Repo.clone_from")
def test_clone_auth_failure_error(self, mock_clone, git_repo):
"""Test authentication failure error handling."""
mock_clone.side_effect = GitCommandError("clone", 128, stderr="fatal: Authentication failed")
mock_clone.side_effect = GitCommandError(
"clone", 128, stderr="fatal: Authentication failed"
)
with pytest.raises(GitCommandError, match="Authentication failed"):
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git")
git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git"
)
@patch("skill_seekers.mcp.git_repo.git.Repo.clone_from")
def test_clone_not_found_error(self, mock_clone, git_repo):
@@ -232,7 +246,9 @@ class TestCloneOrPull:
mock_clone.side_effect = GitCommandError("clone", 128, stderr="fatal: repository not found")
with pytest.raises(GitCommandError, match="Repository not found"):
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/nonexistent.git")
git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/nonexistent.git"
)
class TestFindConfigs:

View File

@@ -276,7 +276,9 @@ class TestGitSourcesE2E:
git_repo = GitConfigRepo(cache_dir=cache_dir)
# Step 1: Clone repository
repo_path = git_repo.clone_or_pull(source_name="test-pull", git_url=git_url, branch="master")
repo_path = git_repo.clone_or_pull(
source_name="test-pull", git_url=git_url, branch="master"
)
initial_configs = git_repo.find_configs(repo_path)
assert len(initial_configs) == 3
@@ -333,7 +335,9 @@ class TestGitSourcesE2E:
git_repo = GitConfigRepo(cache_dir=cache_dir)
# Step 1: Clone repository
repo_path = git_repo.clone_or_pull(source_name="test-refresh", git_url=git_url, branch="master")
repo_path = git_repo.clone_or_pull(
source_name="test-refresh", git_url=git_url, branch="master"
)
# Step 2: Modify local cache manually
corrupt_file = repo_path / "CORRUPTED.txt"
@@ -371,7 +375,9 @@ class TestGitSourcesE2E:
git_repo = GitConfigRepo(cache_dir=cache_dir)
# Step 1: Clone repository
repo_path = git_repo.clone_or_pull(source_name="test-not-found", git_url=git_url, branch="master")
repo_path = git_repo.clone_or_pull(
source_name="test-not-found", git_url=git_url, branch="master"
)
# Step 2: Try to fetch non-existent config
with pytest.raises(FileNotFoundError) as exc_info:
@@ -401,7 +407,9 @@ class TestGitSourcesE2E:
for invalid_url in invalid_urls:
with pytest.raises(ValueError, match="Invalid git URL"):
git_repo.clone_or_pull(source_name="test-invalid", git_url=invalid_url, branch="master")
git_repo.clone_or_pull(
source_name="test-invalid", git_url=invalid_url, branch="master"
)
def test_e2e_source_name_validation(self, temp_dirs):
"""
@@ -496,11 +504,15 @@ class TestGitSourcesE2E:
# Step 1: Clone to cache_dir_1
git_repo_1 = GitConfigRepo(cache_dir=cache_dir_1)
repo_path_1 = git_repo_1.clone_or_pull(source_name="test-source", git_url=git_url, branch="master")
repo_path_1 = git_repo_1.clone_or_pull(
source_name="test-source", git_url=git_url, branch="master"
)
# Step 2: Clone same repo to cache_dir_2
git_repo_2 = GitConfigRepo(cache_dir=cache_dir_2)
repo_path_2 = git_repo_2.clone_or_pull(source_name="test-source", git_url=git_url, branch="master")
repo_path_2 = git_repo_2.clone_or_pull(
source_name="test-source", git_url=git_url, branch="master"
)
# Step 3: Verify both caches are independent
assert repo_path_1 != repo_path_2
@@ -621,7 +633,9 @@ class TestGitSourcesE2E:
repo.index.commit("Increase React config max_pages to 500")
# Step 6: Developers pull updates
git_repo.clone_or_pull(source_name=source["name"], git_url=source["git_url"], branch=source["branch"])
git_repo.clone_or_pull(
source_name=source["name"], git_url=source["git_url"], branch=source["branch"]
)
updated_config = git_repo.get_config(repo_path, "react")
assert updated_config["max_pages"] == 500
@@ -631,7 +645,9 @@ class TestGitSourcesE2E:
repo.index.remove(["react.json"])
repo.index.commit("Remove react.json")
git_repo.clone_or_pull(source_name=source["name"], git_url=source["git_url"], branch=source["branch"])
git_repo.clone_or_pull(
source_name=source["name"], git_url=source["git_url"], branch=source["branch"]
)
# Step 8: Error handling works correctly
with pytest.raises(FileNotFoundError, match="react.json"):
@@ -700,7 +716,11 @@ class TestMCPToolsE2E:
"""
MCP E2E Test 1: Complete add/list/remove workflow via MCP tools
"""
from skill_seekers.mcp.server import add_config_source_tool, list_config_sources_tool, remove_config_source_tool
from skill_seekers.mcp.server import (
add_config_source_tool,
list_config_sources_tool,
remove_config_source_tool,
)
cache_dir, config_dir = temp_dirs
repo_dir, repo = temp_git_repo
@@ -708,7 +728,12 @@ class TestMCPToolsE2E:
# Add source
add_result = await add_config_source_tool(
{"name": "mcp-test-source", "git_url": git_url, "source_type": "custom", "branch": "master"}
{
"name": "mcp-test-source",
"git_url": git_url,
"source_type": "custom",
"branch": "master",
}
)
assert len(add_result) == 1
@@ -744,7 +769,12 @@ class TestMCPToolsE2E:
dest_dir.mkdir(parents=True, exist_ok=True)
result = await fetch_config_tool(
{"config_name": "test-framework", "git_url": git_url, "branch": "master", "destination": str(dest_dir)}
{
"config_name": "test-framework",
"git_url": git_url,
"branch": "master",
"destination": str(dest_dir),
}
)
assert len(result) == 1
@@ -831,10 +861,16 @@ class TestMCPToolsE2E:
assert "" in result[0].text or "not found" in result[0].text.lower()
# Test 5: Fetch non-existent config from valid source
await add_config_source_tool({"name": "valid-source", "git_url": git_url, "branch": "master"})
await add_config_source_tool(
{"name": "valid-source", "git_url": git_url, "branch": "master"}
)
result = await fetch_config_tool(
{"config_name": "non-existent-config", "source": "valid-source", "destination": str(dest_dir)}
{
"config_name": "non-existent-config",
"source": "valid-source",
"destination": str(dest_dir),
}
)
assert "" in result[0].text or "not found" in result[0].text.lower()

View File

@@ -189,7 +189,13 @@ class TestIssueAnalysis:
def test_analyze_issues_known_solutions(self):
"""Test extraction of known solutions (closed issues with comments)."""
issues = [
{"title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": [{"name": "bug"}]},
{
"title": "Fixed OAuth",
"number": 35,
"state": "closed",
"comments": 5,
"labels": [{"name": "bug"}],
},
{
"title": "Closed without comments",
"number": 36,
@@ -239,7 +245,10 @@ class TestIssueAnalysis:
assert len(insights["common_problems"]) <= 10
# Should be sorted by comment count (descending)
if len(insights["common_problems"]) > 1:
assert insights["common_problems"][0]["comments"] >= insights["common_problems"][1]["comments"]
assert (
insights["common_problems"][0]["comments"]
>= insights["common_problems"][1]["comments"]
)
class TestGitHubAPI:
@@ -286,7 +295,13 @@ class TestGitHubAPI:
"""Test fetching issues via GitHub API."""
mock_response = Mock()
mock_response.json.return_value = [
{"title": "Bug", "number": 42, "state": "open", "comments": 10, "labels": [{"name": "bug"}]}
{
"title": "Bug",
"number": 42,
"state": "open",
"comments": 10,
"labels": [{"name": "bug"}],
}
]
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
@@ -304,7 +319,14 @@ class TestGitHubAPI:
mock_response = Mock()
mock_response.json.return_value = [
{"title": "Issue", "number": 42, "state": "open", "comments": 5, "labels": []},
{"title": "PR", "number": 43, "state": "open", "comments": 3, "labels": [], "pull_request": {}},
{
"title": "PR",
"number": 43,
"state": "open",
"comments": 3,
"labels": [],
"pull_request": {},
},
]
mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response
@@ -376,7 +398,13 @@ class TestIntegration:
else:
# Issues call
mock_response.json.return_value = [
{"title": "Test Issue", "number": 42, "state": "open", "comments": 10, "labels": [{"name": "bug"}]}
{
"title": "Test Issue",
"number": 42,
"state": "open",
"comments": 10,
"labels": [{"name": "bug"}],
}
]
return mock_response

View File

@@ -587,7 +587,9 @@ class TestGitHubToSkillConverter(unittest.TestCase):
config = {"repo": "facebook/react", "name": "test", "description": "Test skill"}
# Patch the paths to use our temp directory
with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter._load_data") as mock_load:
with patch(
"skill_seekers.cli.github_scraper.GitHubToSkillConverter._load_data"
) as mock_load:
mock_load.return_value = self.mock_data
converter = self.GitHubToSkillConverter(config)
converter.skill_dir = str(self.output_dir / "test_skill")
@@ -677,7 +679,10 @@ class TestSymlinkHandling(unittest.TestCase):
scraper.repo = Mock()
# First call returns symlink, second call raises 404
scraper.repo.get_contents.side_effect = [mock_symlink, GithubException(404, "Not found")]
scraper.repo.get_contents.side_effect = [
mock_symlink,
GithubException(404, "Not found"),
]
result = scraper._get_file_content("README.md")
@@ -729,7 +734,9 @@ class TestSymlinkHandling(unittest.TestCase):
# Should successfully extract README content
self.assertIn("readme", scraper.extracted_data)
self.assertEqual(scraper.extracted_data["readme"], "# AI SDK\n\nThe AI SDK is a TypeScript toolkit")
self.assertEqual(
scraper.extracted_data["readme"], "# AI SDK\n\nThe AI SDK is a TypeScript toolkit"
)
def test_extract_changelog_with_symlink(self):
"""Test CHANGELOG extraction with symlinked CHANGELOG.md"""
@@ -789,7 +796,9 @@ class TestSymlinkHandling(unittest.TestCase):
mock_content.type = "file"
mock_content.encoding = "none" # Large files have encoding="none"
mock_content.size = 1388271 # 1.4MB CHANGELOG
mock_content.download_url = "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
)
with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config)
@@ -820,7 +829,9 @@ class TestSymlinkHandling(unittest.TestCase):
mock_content.type = "file"
mock_content.encoding = "none"
mock_content.size = 1388271
mock_content.download_url = "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
)
with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config)

View File

@@ -15,7 +15,12 @@ from unittest.mock import MagicMock, Mock, patch
import pytest
from skill_seekers.cli.guide_enhancer import GuideEnhancer, PrerequisiteItem, StepEnhancement, TroubleshootingItem
from skill_seekers.cli.guide_enhancer import (
GuideEnhancer,
PrerequisiteItem,
StepEnhancement,
TroubleshootingItem,
)
class TestGuideEnhancerModeDetection:
@@ -25,7 +30,9 @@ class TestGuideEnhancerModeDetection:
"""Test auto mode detects API when key present and library available"""
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="auto")
# Will be 'api' if library available, otherwise 'local' or 'none'
@@ -96,7 +103,9 @@ class TestGuideEnhancerStepDescriptions:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api":
@@ -104,7 +113,12 @@ class TestGuideEnhancerStepDescriptions:
enhancer.client = Mock() # Mock the client
steps = [{"description": "scraper.scrape(url)", "code": "result = scraper.scrape(url)"}]
steps = [
{
"description": "scraper.scrape(url)",
"code": "result = scraper.scrape(url)",
}
]
result = enhancer.enhance_step_descriptions(steps)
assert len(result) == 1
@@ -129,7 +143,11 @@ class TestGuideEnhancerTroubleshooting:
def test_enhance_troubleshooting_none_mode(self):
"""Test troubleshooting in none mode"""
enhancer = GuideEnhancer(mode="none")
guide_data = {"title": "Test Guide", "steps": [{"description": "test", "code": "code"}], "language": "python"}
guide_data = {
"title": "Test Guide",
"steps": [{"description": "test", "code": "code"}],
"language": "python",
}
result = enhancer.enhance_troubleshooting(guide_data)
assert result == []
@@ -151,7 +169,9 @@ class TestGuideEnhancerTroubleshooting:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api":
@@ -196,7 +216,11 @@ class TestGuideEnhancerPrerequisites:
mock_call.return_value = json.dumps(
{
"prerequisites_detailed": [
{"name": "requests", "why": "HTTP client for making web requests", "setup": "pip install requests"},
{
"name": "requests",
"why": "HTTP client for making web requests",
"setup": "pip install requests",
},
{
"name": "beautifulsoup4",
"why": "HTML/XML parser for web scraping",
@@ -208,7 +232,9 @@ class TestGuideEnhancerPrerequisites:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api":
@@ -240,12 +266,20 @@ class TestGuideEnhancerNextSteps:
def test_enhance_next_steps_api_mode(self, mock_call):
"""Test next steps with API mode"""
mock_call.return_value = json.dumps(
{"next_steps": ["How to handle async workflows", "How to add error handling", "How to implement caching"]}
{
"next_steps": [
"How to handle async workflows",
"How to add error handling",
"How to implement caching",
]
}
)
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api":
@@ -285,7 +319,9 @@ class TestGuideEnhancerUseCases:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api":
@@ -293,7 +329,10 @@ class TestGuideEnhancerUseCases:
enhancer.client = Mock()
guide_data = {"title": "How to Scrape Docs", "description": "Documentation scraping"}
guide_data = {
"title": "How to Scrape Docs",
"description": "Documentation scraping",
}
result = enhancer.enhance_use_cases(guide_data)
assert len(result) == 2
@@ -332,7 +371,11 @@ class TestGuideEnhancerFullWorkflow:
{
"step_descriptions": [
{"step_index": 0, "explanation": "Import required libraries", "variations": []},
{"step_index": 1, "explanation": "Initialize scraper instance", "variations": []},
{
"step_index": 1,
"explanation": "Initialize scraper instance",
"variations": [],
},
],
"troubleshooting": [
{
@@ -342,7 +385,9 @@ class TestGuideEnhancerFullWorkflow:
"solution": "pip install requests",
}
],
"prerequisites_detailed": [{"name": "requests", "why": "HTTP client", "setup": "pip install requests"}],
"prerequisites_detailed": [
{"name": "requests", "why": "HTTP client", "setup": "pip install requests"}
],
"next_steps": ["How to add authentication"],
"use_cases": ["Automate documentation extraction"],
}
@@ -350,7 +395,9 @@ class TestGuideEnhancerFullWorkflow:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic:
with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api":
@@ -508,7 +555,11 @@ class TestGuideEnhancerResponseParsing:
}
)
guide_data = {"title": "Test", "steps": [{"description": "Test", "code": "test"}], "language": "python"}
guide_data = {
"title": "Test",
"steps": [{"description": "Test", "code": "test"}],
"language": "python",
}
result = enhancer._parse_enhancement_response(response, guide_data)

View File

@@ -121,7 +121,10 @@ def test_workflow():
def test_calculate_complexity(self):
"""Test complexity level calculation"""
# Simple workflow - beginner
simple_steps = [WorkflowStep(1, "x = 1", "Assign variable"), WorkflowStep(2, "print(x)", "Print variable")]
simple_steps = [
WorkflowStep(1, "x = 1", "Assign variable"),
WorkflowStep(2, "print(x)", "Print variable"),
]
simple_workflow = {"code": "x = 1\nprint(x)", "category": "workflow"}
complexity_simple = self.analyzer._calculate_complexity(simple_steps, simple_workflow)
self.assertEqual(complexity_simple, "beginner")
@@ -129,7 +132,9 @@ def test_workflow():
# Complex workflow - advanced
complex_steps = [WorkflowStep(i, f"step{i}", f"Step {i}") for i in range(1, 8)]
complex_workflow = {
"code": "\n".join([f"async def step{i}(): await complex_operation()" for i in range(7)]),
"code": "\n".join(
[f"async def step{i}(): await complex_operation()" for i in range(7)]
),
"category": "workflow",
}
complexity_complex = self.analyzer._calculate_complexity(complex_steps, complex_workflow)
@@ -466,8 +471,12 @@ class TestHowToGuideBuilder(unittest.TestCase):
def test_create_collection(self):
"""Test guide collection creation with metadata"""
guides = [
HowToGuide(guide_id="guide-1", title="Guide 1", overview="Test", complexity_level="beginner"),
HowToGuide(guide_id="guide-2", title="Guide 2", overview="Test", complexity_level="advanced"),
HowToGuide(
guide_id="guide-1", title="Guide 1", overview="Test", complexity_level="beginner"
),
HowToGuide(
guide_id="guide-2", title="Guide 2", overview="Test", complexity_level="advanced"
),
]
collection = self.builder._create_collection(guides)
@@ -492,7 +501,10 @@ class TestHowToGuideBuilder(unittest.TestCase):
# Correct attribute names
collection = GuideCollection(
total_guides=1, guides=guides, guides_by_complexity={"beginner": 1}, guides_by_use_case={}
total_guides=1,
guides=guides,
guides_by_complexity={"beginner": 1},
guides_by_use_case={},
)
output_dir = Path(self.temp_dir)
@@ -905,7 +917,10 @@ def test_file_processing():
output_dir = Path(self.temp_dir) / "guides_fallback"
# Mock GuideEnhancer to raise exception
with patch("skill_seekers.cli.guide_enhancer.GuideEnhancer", side_effect=Exception("AI unavailable")):
with patch(
"skill_seekers.cli.guide_enhancer.GuideEnhancer",
side_effect=Exception("AI unavailable"),
):
# Should NOT crash - graceful fallback
collection = builder.build_guides_from_examples(
examples=examples,

View File

@@ -328,7 +328,9 @@ class TestInstallToAllAgents:
def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path):
with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
results = install_to_all_agents(self.skill_dir, force=True)
assert len(results) == 11
@@ -357,7 +359,9 @@ class TestInstallToAllAgents:
def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path):
with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
# Without force - should fail
results_no_force = install_to_all_agents(self.skill_dir, force=False)
# All should fail because directories exist
@@ -400,7 +404,10 @@ class TestInstallAgentCLI:
def test_cli_help_output(self):
"""Test that --help shows usage information."""
with pytest.raises(SystemExit) as exc_info, patch("sys.argv", ["install_agent.py", "--help"]):
with (
pytest.raises(SystemExit) as exc_info,
patch("sys.argv", ["install_agent.py", "--help"]),
):
main()
# --help exits with code 0
@@ -422,8 +429,13 @@ class TestInstallAgentCLI:
def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path):
with patch("sys.argv", ["install_agent.py", str(self.skill_dir), "--agent", "claude", "--dry-run"]):
with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
with patch(
"sys.argv",
["install_agent.py", str(self.skill_dir), "--agent", "claude", "--dry-run"],
):
exit_code = main()
assert exit_code == 0
@@ -437,8 +449,13 @@ class TestInstallAgentCLI:
def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path):
with patch("sys.argv", ["install_agent.py", str(self.skill_dir), "--agent", "claude", "--force"]):
with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
with patch(
"sys.argv",
["install_agent.py", str(self.skill_dir), "--agent", "claude", "--force"],
):
exit_code = main()
assert exit_code == 0
@@ -454,8 +471,13 @@ class TestInstallAgentCLI:
def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path):
with patch("sys.argv", ["install_agent.py", str(self.skill_dir), "--agent", "all", "--force"]):
with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
with patch(
"sys.argv",
["install_agent.py", str(self.skill_dir), "--agent", "all", "--force"],
):
exit_code = main()
assert exit_code == 0

View File

@@ -23,7 +23,9 @@ class TestInstallCLI(unittest.TestCase):
# Create parser like install_skill.py does
parser = argparse.ArgumentParser()
parser.add_argument("--config", required=True)
parser.add_argument("--target", choices=["claude", "gemini", "openai", "markdown"], default="claude")
parser.add_argument(
"--target", choices=["claude", "gemini", "openai", "markdown"], default="claude"
)
# Test that each platform is accepted
for platform in ["claude", "gemini", "openai", "markdown"]:
@@ -43,7 +45,9 @@ class TestInstallCLI(unittest.TestCase):
parser = argparse.ArgumentParser()
parser.add_argument("--config", required=True)
parser.add_argument("--target", choices=["claude", "gemini", "openai", "markdown"], default="claude")
parser.add_argument(
"--target", choices=["claude", "gemini", "openai", "markdown"], default="claude"
)
# Should raise SystemExit for invalid target
with self.assertRaises(SystemExit):
@@ -62,7 +66,10 @@ class TestInstallToolMultiPlatform(unittest.IsolatedAsyncioTestCase):
for target in ["claude", "gemini", "openai"]:
# Use dry_run=True which skips actual execution
# It will still show us the platform is being recognized
with patch("builtins.open", create=True) as mock_open, patch("json.load") as mock_json_load:
with (
patch("builtins.open", create=True) as mock_open,
patch("json.load") as mock_json_load,
):
# Mock config file reading
mock_json_load.return_value = {"name": "test-skill"}
mock_file = MagicMock()

View File

@@ -44,7 +44,9 @@ class TestInstallSkillValidation:
@pytest.mark.asyncio
async def test_validation_both_configs(self):
"""Test error when both config_name and config_path provided"""
result = await install_skill_tool({"config_name": "react", "config_path": "configs/react.json"})
result = await install_skill_tool(
{"config_name": "react", "config_path": "configs/react.json"}
)
assert len(result) == 1
assert isinstance(result[0], TextContent)
@@ -114,7 +116,10 @@ class TestInstallSkillEnhancementMandatory:
# Verify enhancement phase is present
assert "AI Enhancement (MANDATORY)" in output
assert "Enhancement is REQUIRED for quality (3/10→9/10 boost)" in output or "REQUIRED for quality" in output
assert (
"Enhancement is REQUIRED for quality (3/10→9/10 boost)" in output
or "REQUIRED for quality" in output
)
# Verify it's not optional
assert "MANDATORY" in output
@@ -134,13 +139,23 @@ class TestInstallSkillPhaseOrchestration:
@patch("builtins.open")
@patch("os.environ.get")
async def test_full_workflow_with_fetch(
self, mock_env_get, mock_open, mock_upload, mock_package, mock_subprocess, mock_scrape, mock_fetch
self,
mock_env_get,
mock_open,
mock_upload,
mock_package,
mock_subprocess,
mock_scrape,
mock_fetch,
):
"""Test complete workflow when config_name is provided"""
# Mock fetch_config response
mock_fetch.return_value = [
TextContent(type="text", text="✅ Config fetched successfully\n\nConfig saved to: configs/react.json")
TextContent(
type="text",
text="✅ Config fetched successfully\n\nConfig saved to: configs/react.json",
)
]
# Mock config file read
@@ -159,7 +174,9 @@ class TestInstallSkillPhaseOrchestration:
mock_subprocess.return_value = ("✅ Enhancement complete", "", 0)
# Mock package response
mock_package.return_value = [TextContent(type="text", text="✅ Package complete\n\nSaved to: output/react.zip")]
mock_package.return_value = [
TextContent(type="text", text="✅ Package complete\n\nSaved to: output/react.zip")
]
# Mock upload response
mock_upload.return_value = [TextContent(type="text", text="✅ Upload successful")]
@@ -220,7 +237,9 @@ class TestInstallSkillPhaseOrchestration:
mock_env_get.return_value = ""
# Run the workflow
result = await install_skill_tool({"config_path": "configs/custom.json", "auto_upload": True})
result = await install_skill_tool(
{"config_path": "configs/custom.json", "auto_upload": True}
)
output = result[0].text
@@ -248,7 +267,9 @@ class TestInstallSkillErrorHandling:
"""Test handling of fetch phase failure"""
# Mock fetch failure
mock_fetch.return_value = [TextContent(type="text", text="❌ Failed to fetch config: Network error")]
mock_fetch.return_value = [
TextContent(type="text", text="❌ Failed to fetch config: Network error")
]
result = await install_skill_tool({"config_name": "react"})
@@ -271,7 +292,9 @@ class TestInstallSkillErrorHandling:
mock_open.return_value = mock_file
# Mock scrape failure
mock_scrape.return_value = [TextContent(type="text", text="❌ Scraping failed: Connection timeout")]
mock_scrape.return_value = [
TextContent(type="text", text="❌ Scraping failed: Connection timeout")
]
result = await install_skill_tool({"config_path": "configs/test.json"})
@@ -317,7 +340,9 @@ class TestInstallSkillOptions:
@pytest.mark.asyncio
async def test_no_upload_option(self):
"""Test that no_upload option skips upload phase"""
result = await install_skill_tool({"config_name": "react", "auto_upload": False, "dry_run": True})
result = await install_skill_tool(
{"config_name": "react", "auto_upload": False, "dry_run": True}
)
output = result[0].text
@@ -328,7 +353,9 @@ class TestInstallSkillOptions:
@pytest.mark.asyncio
async def test_unlimited_option(self):
"""Test that unlimited option is passed to scraper"""
result = await install_skill_tool({"config_path": "configs/react.json", "unlimited": True, "dry_run": True})
result = await install_skill_tool(
{"config_path": "configs/react.json", "unlimited": True, "dry_run": True}
)
output = result[0].text
@@ -338,7 +365,9 @@ class TestInstallSkillOptions:
@pytest.mark.asyncio
async def test_custom_destination(self):
"""Test custom destination directory"""
result = await install_skill_tool({"config_name": "react", "destination": "/tmp/skills", "dry_run": True})
result = await install_skill_tool(
{"config_name": "react", "destination": "/tmp/skills", "dry_run": True}
)
output = result[0].text

View File

@@ -95,7 +95,9 @@ class TestInstallSkillE2E:
return str(skill_dir)
@pytest.mark.asyncio
async def test_e2e_with_config_path_no_upload(self, test_config_file, tmp_path, mock_scrape_output):
async def test_e2e_with_config_path_no_upload(
self, test_config_file, tmp_path, mock_scrape_output
):
"""E2E test: config_path mode, no upload"""
# Mock the subprocess calls for scraping and enhancement
@@ -106,7 +108,10 @@ class TestInstallSkillE2E:
):
# Mock scrape_docs to return success
mock_scrape.return_value = [
TextContent(type="text", text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}")
TextContent(
type="text",
text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}",
)
]
# Mock enhancement subprocess (success)
@@ -114,7 +119,9 @@ class TestInstallSkillE2E:
# Mock package_skill to return success
zip_path = str(tmp_path / "output" / "test-e2e.zip")
mock_package.return_value = [TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")]
mock_package.return_value = [
TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")
]
# Run the tool
result = await install_skill_tool(
@@ -167,7 +174,10 @@ class TestInstallSkillE2E:
# Mock fetch_config to return success
config_path = str(tmp_path / "configs" / "react.json")
mock_fetch.return_value = [
TextContent(type="text", text=f"✅ Config fetched successfully\n\nConfig saved to: {config_path}")
TextContent(
type="text",
text=f"✅ Config fetched successfully\n\nConfig saved to: {config_path}",
)
]
# Mock config file read
@@ -178,7 +188,9 @@ class TestInstallSkillE2E:
# Mock scrape_docs
skill_dir = str(tmp_path / "output" / "react")
mock_scrape.return_value = [
TextContent(type="text", text=f"✅ Scraping complete\n\nSkill built at: {skill_dir}")
TextContent(
type="text", text=f"✅ Scraping complete\n\nSkill built at: {skill_dir}"
)
]
# Mock enhancement
@@ -186,7 +198,9 @@ class TestInstallSkillE2E:
# Mock package
zip_path = str(tmp_path / "output" / "react.zip")
mock_package.return_value = [TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")]
mock_package.return_value = [
TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")
]
# Mock env (no API key - should skip upload)
mock_env.return_value = ""
@@ -222,7 +236,9 @@ class TestInstallSkillE2E:
async def test_e2e_dry_run_mode(self, test_config_file):
"""E2E test: dry-run mode (no actual execution)"""
result = await install_skill_tool({"config_path": test_config_file, "auto_upload": False, "dry_run": True})
result = await install_skill_tool(
{"config_path": test_config_file, "auto_upload": False, "dry_run": True}
)
output = result[0].text
@@ -245,9 +261,13 @@ class TestInstallSkillE2E:
with patch("skill_seekers.mcp.server.scrape_docs_tool") as mock_scrape:
# Mock scrape failure
mock_scrape.return_value = [TextContent(type="text", text="❌ Scraping failed: Network timeout")]
mock_scrape.return_value = [
TextContent(type="text", text="❌ Scraping failed: Network timeout")
]
result = await install_skill_tool({"config_path": test_config_file, "auto_upload": False, "dry_run": False})
result = await install_skill_tool(
{"config_path": test_config_file, "auto_upload": False, "dry_run": False}
)
output = result[0].text
@@ -256,7 +276,9 @@ class TestInstallSkillE2E:
assert "WORKFLOW COMPLETE" not in output
@pytest.mark.asyncio
async def test_e2e_error_handling_enhancement_failure(self, test_config_file, mock_scrape_output):
async def test_e2e_error_handling_enhancement_failure(
self, test_config_file, mock_scrape_output
):
"""E2E test: error handling when enhancement fails"""
with (
@@ -265,13 +287,18 @@ class TestInstallSkillE2E:
):
# Mock successful scrape
mock_scrape.return_value = [
TextContent(type="text", text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}")
TextContent(
type="text",
text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}",
)
]
# Mock enhancement failure
mock_enhance.return_value = ("", "Enhancement error: Claude not found", 1)
result = await install_skill_tool({"config_path": test_config_file, "auto_upload": False, "dry_run": False})
result = await install_skill_tool(
{"config_path": test_config_file, "auto_upload": False, "dry_run": False}
)
output = result[0].text
@@ -311,7 +338,9 @@ class TestInstallSkillCLI_E2E:
# Import and call the tool directly (more reliable than subprocess)
from skill_seekers.mcp.server import install_skill_tool
result = await install_skill_tool({"config_path": test_config_file, "dry_run": True, "auto_upload": False})
result = await install_skill_tool(
{"config_path": test_config_file, "dry_run": True, "auto_upload": False}
)
# Verify output
output = result[0].text
@@ -324,7 +353,9 @@ class TestInstallSkillCLI_E2E:
# Run CLI without config
result = subprocess.run(
[sys.executable, "-m", "skill_seekers.cli.install_skill"], capture_output=True, text=True
[sys.executable, "-m", "skill_seekers.cli.install_skill"],
capture_output=True,
text=True,
)
# Should fail
@@ -337,7 +368,9 @@ class TestInstallSkillCLI_E2E:
"""E2E test: CLI help command"""
result = subprocess.run(
[sys.executable, "-m", "skill_seekers.cli.install_skill", "--help"], capture_output=True, text=True
[sys.executable, "-m", "skill_seekers.cli.install_skill", "--help"],
capture_output=True,
text=True,
)
# Should succeed
@@ -354,7 +387,9 @@ class TestInstallSkillCLI_E2E:
@patch("skill_seekers.mcp.server.scrape_docs_tool")
@patch("skill_seekers.mcp.server.run_subprocess_with_streaming")
@patch("skill_seekers.mcp.server.package_skill_tool")
async def test_cli_full_workflow_mocked(self, mock_package, mock_enhance, mock_scrape, test_config_file, tmp_path):
async def test_cli_full_workflow_mocked(
self, mock_package, mock_enhance, mock_scrape, test_config_file, tmp_path
):
"""E2E test: Full CLI workflow with mocked phases (via direct call)"""
# Setup mocks
@@ -366,7 +401,9 @@ class TestInstallSkillCLI_E2E:
mock_enhance.return_value = ("✅ Enhancement complete", "", 0)
zip_path = str(tmp_path / "output" / "test-cli-e2e.zip")
mock_package.return_value = [TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")]
mock_package.return_value = [
TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")
]
# Call the tool directly
from skill_seekers.mcp.server import install_skill_tool

View File

@@ -172,7 +172,9 @@ class TestRealConfigFiles(unittest.TestCase):
if os.path.exists(config_path):
config = load_config(config_path)
errors, _ = validate_config(config)
self.assertEqual(len(errors), 0, f"FastAPI config should be valid, got errors: {errors}")
self.assertEqual(
len(errors), 0, f"FastAPI config should be valid, got errors: {errors}"
)
def test_steam_economy_config(self):
"""Test Steam Economy config is valid"""
@@ -180,7 +182,9 @@ class TestRealConfigFiles(unittest.TestCase):
if os.path.exists(config_path):
config = load_config(config_path)
errors, _ = validate_config(config)
self.assertEqual(len(errors), 0, f"Steam Economy config should be valid, got errors: {errors}")
self.assertEqual(
len(errors), 0, f"Steam Economy config should be valid, got errors: {errors}"
)
class TestURLProcessing(unittest.TestCase):
@@ -221,7 +225,11 @@ class TestURLProcessing(unittest.TestCase):
config = {
"name": "test",
"base_url": "https://example.com/",
"start_urls": ["https://example.com/guide/", "https://example.com/api/", "https://example.com/tutorial/"],
"start_urls": [
"https://example.com/guide/",
"https://example.com/api/",
"https://example.com/tutorial/",
],
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
"rate_limit": 0.1,
"max_pages": 10,
@@ -423,14 +431,20 @@ app.use('*', cors())
# Verify llms.txt was detected
self.assertTrue(scraper.llms_txt_detected, "llms.txt should be detected")
self.assertEqual(scraper.llms_txt_variant, "explicit", "Should use explicit variant from config")
self.assertEqual(
scraper.llms_txt_variant, "explicit", "Should use explicit variant from config"
)
# Verify pages were parsed
self.assertGreater(len(scraper.pages), 0, "Should have parsed pages from llms.txt")
# Verify page structure
self.assertTrue(all("title" in page for page in scraper.pages), "All pages should have titles")
self.assertTrue(all("content" in page for page in scraper.pages), "All pages should have content")
self.assertTrue(
all("title" in page for page in scraper.pages), "All pages should have titles"
)
self.assertTrue(
all("content" in page for page in scraper.pages), "All pages should have content"
)
self.assertTrue(
any(len(page.get("code_samples", [])) > 0 for page in scraper.pages),
"At least one page should have code samples",

View File

@@ -51,7 +51,9 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
mock_content.type = "file"
mock_content.encoding = "none" # This is what GitHub API returns for large files
mock_content.size = 1388271
mock_content.download_url = "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
)
with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config)
@@ -109,7 +111,9 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
def test_github_command_has_enhancement_flags(self):
"""E2E: Verify --enhance-local flag exists in github command help"""
result = subprocess.run(["skill-seekers", "github", "--help"], capture_output=True, text=True)
result = subprocess.run(
["skill-seekers", "github", "--help"], capture_output=True, text=True
)
# VERIFY: Command succeeds
self.assertEqual(result.returncode, 0, "github --help should succeed")
@@ -148,9 +152,20 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
from skill_seekers.cli import main
# Mock sys.argv to simulate CLI call
test_args = ["skill-seekers", "github", "--repo", "test/test", "--name", "test", "--enhance-local"]
test_args = [
"skill-seekers",
"github",
"--repo",
"test/test",
"--name",
"test",
"--enhance-local",
]
with patch("sys.argv", test_args), patch("skill_seekers.cli.github_scraper.main") as mock_github_main:
with (
patch("sys.argv", test_args),
patch("skill_seekers.cli.github_scraper.main") as mock_github_main,
):
mock_github_main.return_value = 0
# Call main dispatcher
@@ -165,9 +180,12 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
# VERIFY: sys.argv contains --enhance-local flag
# (main.py should have added it before calling github_scraper)
called_with_enhance = any("--enhance-local" in str(call) for call in mock_github_main.call_args_list)
called_with_enhance = any(
"--enhance-local" in str(call) for call in mock_github_main.call_args_list
)
self.assertTrue(
called_with_enhance or "--enhance-local" in sys.argv, "Flag should be forwarded to github_scraper"
called_with_enhance or "--enhance-local" in sys.argv,
"Flag should be forwarded to github_scraper",
)
@@ -203,7 +221,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
custom_url = "http://localhost:3000"
with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}),
patch.dict(
os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}
),
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
):
# Create enhancer
@@ -213,7 +233,11 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
mock_anthropic.assert_called_once()
call_kwargs = mock_anthropic.call_args[1]
self.assertIn("base_url", call_kwargs, "base_url should be passed")
self.assertEqual(call_kwargs["base_url"], custom_url, "base_url should match ANTHROPIC_BASE_URL env var")
self.assertEqual(
call_kwargs["base_url"],
custom_url,
"base_url should match ANTHROPIC_BASE_URL env var",
)
def test_anthropic_auth_token_support(self):
"""E2E: Verify ANTHROPIC_AUTH_TOKEN is accepted as alternative to ANTHROPIC_API_KEY"""
@@ -234,13 +258,17 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
# VERIFY: api_key set to ANTHROPIC_AUTH_TOKEN value
self.assertEqual(
enhancer.api_key, custom_token, "Should use ANTHROPIC_AUTH_TOKEN when ANTHROPIC_API_KEY not set"
enhancer.api_key,
custom_token,
"Should use ANTHROPIC_AUTH_TOKEN when ANTHROPIC_API_KEY not set",
)
# VERIFY: Anthropic client initialized with correct key
mock_anthropic.assert_called_once()
call_kwargs = mock_anthropic.call_args[1]
self.assertEqual(call_kwargs["api_key"], custom_token, "api_key should match ANTHROPIC_AUTH_TOKEN")
self.assertEqual(
call_kwargs["api_key"], custom_token, "api_key should match ANTHROPIC_AUTH_TOKEN"
)
def test_thinking_block_handling(self):
"""E2E: Verify ThinkingBlock doesn't cause .text AttributeError"""
@@ -284,7 +312,11 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
# VERIFY: Should find text from TextBlock, ignore ThinkingBlock
self.assertIsNotNone(result, "Should return enhanced content")
self.assertEqual(result, "# Enhanced SKILL.md\n\nContent here", "Should extract text from TextBlock")
self.assertEqual(
result,
"# Enhanced SKILL.md\n\nContent here",
"Should extract text from TextBlock",
)
class TestIssue219IntegrationAll(unittest.TestCase):
@@ -297,7 +329,9 @@ class TestIssue219IntegrationAll(unittest.TestCase):
# 2. Large files are downloaded
# 3. Custom API endpoints work
result = subprocess.run(["skill-seekers", "github", "--help"], capture_output=True, text=True)
result = subprocess.run(
["skill-seekers", "github", "--help"], capture_output=True, text=True
)
# All flags present
self.assertIn("--enhance", result.stdout)

View File

@@ -48,7 +48,9 @@ def test_url_parsing_with_complex_paths():
assert variants is not None
assert variants["url"] == "https://example.com/llms-full.txt"
mock_head.assert_called_with("https://example.com/llms-full.txt", timeout=5, allow_redirects=True)
mock_head.assert_called_with(
"https://example.com/llms-full.txt", timeout=5, allow_redirects=True
)
def test_detect_all_variants():

View File

@@ -133,7 +133,10 @@ def test_custom_max_retries():
"""Test custom max_retries parameter"""
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
with patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, patch("time.sleep"):
with (
patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
patch("time.sleep"),
):
content = downloader.download()
assert content is None
@@ -189,7 +192,9 @@ def test_is_markdown_rejects_html_doctype():
"""Test that HTML with DOCTYPE is rejected (prevents redirect trap)"""
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
html = "<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
html = (
"<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
)
assert not downloader._is_markdown(html)
# Test case-insensitive

View File

@@ -93,7 +93,9 @@ plain code without language
- [HTML Page](./page.html)
- [External](https://google.com)
"""
result = self.converter._extract_markdown_content(content, "https://example.com/docs/test.md")
result = self.converter._extract_markdown_content(
content, "https://example.com/docs/test.md"
)
# Should only include .md links
md_links = [l for l in result["links"] if ".md" in l]
self.assertEqual(len(md_links), len(result["links"]))
@@ -115,7 +117,9 @@ Another paragraph that should be included in the final content output.
def test_detect_html_in_md_url(self):
"""Test that HTML content is detected when .md URL returns HTML."""
html_content = "<!DOCTYPE html><html><head><title>Page</title></head><body><h1>Hello</h1></body></html>"
result = self.converter._extract_markdown_content(html_content, "https://example.com/test.md")
result = self.converter._extract_markdown_content(
html_content, "https://example.com/test.md"
)
self.assertEqual(result["title"], "Page")

View File

@@ -67,7 +67,10 @@ def sample_config(temp_dirs):
"base_url": "https://test-framework.dev/",
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
"url_patterns": {"include": ["/docs/"], "exclude": ["/blog/", "/search/"]},
"categories": {"getting_started": ["introduction", "getting-started"], "api": ["api", "reference"]},
"categories": {
"getting_started": ["introduction", "getting-started"],
"api": ["api", "reference"],
},
"rate_limit": 0.5,
"max_pages": 100,
}
@@ -85,7 +88,12 @@ def unified_config(temp_dirs):
"description": "Test unified scraping",
"merge_mode": "rule-based",
"sources": [
{"type": "documentation", "base_url": "https://example.com/docs/", "extract_api": True, "max_pages": 10},
{
"type": "documentation",
"base_url": "https://example.com/docs/",
"extract_api": True,
"max_pages": 10,
},
{"type": "github", "repo": "test/repo", "extract_readme": True},
],
}
@@ -166,7 +174,11 @@ class TestConfigTools:
"""Test basic config generation."""
monkeypatch.chdir(temp_dirs["base"])
args = {"name": "my-framework", "url": "https://my-framework.dev/", "description": "My framework skill"}
args = {
"name": "my-framework",
"url": "https://my-framework.dev/",
"description": "My framework skill",
}
result = await server_fastmcp.generate_config(**args)
@@ -232,7 +244,9 @@ class TestConfigTools:
async def test_validate_config_missing_file(self, temp_dirs):
"""Test validating a non-existent config file."""
result = await server_fastmcp.validate_config(config_path=str(temp_dirs["config"] / "nonexistent.json"))
result = await server_fastmcp.validate_config(
config_path=str(temp_dirs["config"] / "nonexistent.json")
)
assert isinstance(result, str)
# Should indicate error
@@ -252,7 +266,9 @@ class TestScrapingTools:
async def test_estimate_pages_basic(self, sample_config):
"""Test basic page estimation."""
with patch("subprocess.run") as mock_run:
mock_run.return_value = Mock(returncode=0, stdout="Estimated pages: 150\nRecommended max_pages: 200")
mock_run.return_value = Mock(
returncode=0, stdout="Estimated pages: 150\nRecommended max_pages: 200"
)
result = await server_fastmcp.estimate_pages(config_path=str(sample_config))
@@ -266,7 +282,9 @@ class TestScrapingTools:
async def test_estimate_pages_custom_discovery(self, sample_config):
"""Test estimation with custom max_discovery."""
result = await server_fastmcp.estimate_pages(config_path=str(sample_config), max_discovery=500)
result = await server_fastmcp.estimate_pages(
config_path=str(sample_config), max_discovery=500
)
assert isinstance(result, str)
@@ -281,7 +299,9 @@ class TestScrapingTools:
async def test_scrape_docs_with_enhancement(self, sample_config):
"""Test scraping with local enhancement."""
result = await server_fastmcp.scrape_docs(config_path=str(sample_config), enhance_local=True, dry_run=True)
result = await server_fastmcp.scrape_docs(
config_path=str(sample_config), enhance_local=True, dry_run=True
)
assert isinstance(result, str)
@@ -310,7 +330,9 @@ class TestScrapingTools:
with patch("subprocess.run") as mock_run:
mock_run.return_value = Mock(returncode=0, stdout="GitHub scraping completed")
result = await server_fastmcp.scrape_github(repo="facebook/react", name="react-github-test")
result = await server_fastmcp.scrape_github(
repo="facebook/react", name="react-github-test"
)
assert isinstance(result, str)
@@ -325,7 +347,12 @@ class TestScrapingTools:
async def test_scrape_github_options(self):
"""Test GitHub scraping with various options."""
result = await server_fastmcp.scrape_github(
repo="test/repo", no_issues=True, no_changelog=True, no_releases=True, max_issues=50, scrape_only=True
repo="test/repo",
no_issues=True,
no_changelog=True,
no_releases=True,
max_issues=50,
scrape_only=True,
)
assert isinstance(result, str)
@@ -333,7 +360,11 @@ class TestScrapingTools:
async def test_scrape_pdf_basic(self, temp_dirs):
"""Test basic PDF scraping."""
# Create a dummy PDF config
pdf_config = {"name": "test-pdf", "pdf_path": "/path/to/test.pdf", "description": "Test PDF skill"}
pdf_config = {
"name": "test-pdf",
"pdf_path": "/path/to/test.pdf",
"description": "Test PDF skill",
}
config_path = temp_dirs["config"] / "test-pdf.json"
config_path.write_text(json.dumps(pdf_config))
@@ -343,7 +374,9 @@ class TestScrapingTools:
async def test_scrape_pdf_direct_path(self):
"""Test PDF scraping with direct path."""
result = await server_fastmcp.scrape_pdf(pdf_path="/path/to/manual.pdf", name="manual-skill")
result = await server_fastmcp.scrape_pdf(
pdf_path="/path/to/manual.pdf", name="manual-skill"
)
assert isinstance(result, str)
@@ -428,7 +461,9 @@ class TestPackagingTools:
async def test_upload_skill_missing_file(self, temp_dirs):
"""Test upload with missing file."""
result = await server_fastmcp.upload_skill(skill_zip=str(temp_dirs["output"] / "nonexistent.zip"))
result = await server_fastmcp.upload_skill(
skill_zip=str(temp_dirs["output"] / "nonexistent.zip")
)
assert isinstance(result, str)
@@ -438,7 +473,9 @@ class TestPackagingTools:
with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch:
mock_fetch.return_value = [Mock(text="Config fetched")]
result = await server_fastmcp.install_skill(config_name="react", destination="output", dry_run=True)
result = await server_fastmcp.install_skill(
config_name="react", destination="output", dry_run=True
)
assert isinstance(result, str)
@@ -458,7 +495,9 @@ class TestPackagingTools:
with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch:
mock_fetch.return_value = [Mock(text="Config fetched")]
result = await server_fastmcp.install_skill(config_name="react", unlimited=True, dry_run=True)
result = await server_fastmcp.install_skill(
config_name="react", unlimited=True, dry_run=True
)
assert isinstance(result, str)
@@ -467,7 +506,9 @@ class TestPackagingTools:
with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch:
mock_fetch.return_value = [Mock(text="Config fetched")]
result = await server_fastmcp.install_skill(config_name="react", auto_upload=False, dry_run=True)
result = await server_fastmcp.install_skill(
config_name="react", auto_upload=False, dry_run=True
)
assert isinstance(result, str)
@@ -484,7 +525,9 @@ class TestSplittingTools:
async def test_split_config_auto_strategy(self, sample_config):
"""Test config splitting with auto strategy."""
result = await server_fastmcp.split_config(config_path=str(sample_config), strategy="auto", dry_run=True)
result = await server_fastmcp.split_config(
config_path=str(sample_config), strategy="auto", dry_run=True
)
assert isinstance(result, str)
@@ -510,7 +553,9 @@ class TestSplittingTools:
(temp_dirs["config"] / "godot-scripting.json").write_text("{}")
(temp_dirs["config"] / "godot-physics.json").write_text("{}")
result = await server_fastmcp.generate_router(config_pattern=str(temp_dirs["config"] / "godot-*.json"))
result = await server_fastmcp.generate_router(
config_pattern=str(temp_dirs["config"] / "godot-*.json")
)
assert isinstance(result, str)
@@ -552,7 +597,9 @@ class TestSourceTools:
async def test_fetch_config_download_api(self, temp_dirs):
"""Test downloading specific config from API."""
result = await server_fastmcp.fetch_config(config_name="react", destination=str(temp_dirs["config"]))
result = await server_fastmcp.fetch_config(
config_name="react", destination=str(temp_dirs["config"])
)
assert isinstance(result, str)
@@ -565,7 +612,9 @@ class TestSourceTools:
async def test_fetch_config_from_git_url(self, temp_dirs):
"""Test fetching config from git URL."""
result = await server_fastmcp.fetch_config(
config_name="react", git_url="https://github.com/myorg/configs.git", destination=str(temp_dirs["config"])
config_name="react",
git_url="https://github.com/myorg/configs.git",
destination=str(temp_dirs["config"]),
)
assert isinstance(result, str)
@@ -612,13 +661,17 @@ class TestSourceTools:
"""Test submitting config as JSON string."""
config_json = json.dumps({"name": "my-framework", "base_url": "https://my-framework.dev/"})
result = await server_fastmcp.submit_config(config_json=config_json, testing_notes="Works great!")
result = await server_fastmcp.submit_config(
config_json=config_json, testing_notes="Works great!"
)
assert isinstance(result, str)
async def test_add_config_source_basic(self):
"""Test adding a config source."""
result = await server_fastmcp.add_config_source(name="team", git_url="https://github.com/myorg/configs.git")
result = await server_fastmcp.add_config_source(
name="team", git_url="https://github.com/myorg/configs.git"
)
assert isinstance(result, str)
@@ -706,7 +759,9 @@ class TestFastMCPIntegration:
async def test_workflow_split_router(self, sample_config, temp_dirs):
"""Test workflow: split config → generate router."""
# Step 1: Split config
result1 = await server_fastmcp.split_config(config_path=str(sample_config), strategy="category", dry_run=True)
result1 = await server_fastmcp.split_config(
config_path=str(sample_config), strategy="category", dry_run=True
)
assert isinstance(result1, str)
# Step 2: Generate router

View File

@@ -42,7 +42,11 @@ def mock_git_repo(temp_dirs):
(repo_path / ".git").mkdir()
# Create sample config files
react_config = {"name": "react", "description": "React framework", "base_url": "https://react.dev/"}
react_config = {
"name": "react",
"description": "React framework",
"base_url": "https://react.dev/",
}
(repo_path / "react.json").write_text(json.dumps(react_config, indent=2))
vue_config = {"name": "vue", "description": "Vue framework", "base_url": "https://vuejs.org/"}
@@ -65,8 +69,18 @@ class TestFetchConfigModes:
mock_response = MagicMock()
mock_response.json.return_value = {
"configs": [
{"name": "react", "category": "web-frameworks", "description": "React framework", "type": "single"},
{"name": "vue", "category": "web-frameworks", "description": "Vue framework", "type": "single"},
{
"name": "react",
"category": "web-frameworks",
"description": "React framework",
"type": "single",
},
{
"name": "vue",
"category": "web-frameworks",
"description": "Vue framework",
"type": "single",
},
],
"total": 2,
}
@@ -94,7 +108,10 @@ class TestFetchConfigModes:
}
mock_download_response = MagicMock()
mock_download_response.json.return_value = {"name": "react", "base_url": "https://react.dev/"}
mock_download_response.json.return_value = {
"name": "react",
"base_url": "https://react.dev/",
}
mock_client_instance = mock_client.return_value.__aenter__.return_value
mock_client_instance.get.side_effect = [mock_detail_response, mock_download_response]
@@ -149,7 +166,9 @@ class TestFetchConfigModes:
@patch("skill_seekers.mcp.server.GitConfigRepo")
@patch("skill_seekers.mcp.server.SourceManager")
async def test_fetch_config_source_mode(self, mock_source_manager_class, mock_git_repo_class, temp_dirs):
async def test_fetch_config_source_mode(
self, mock_source_manager_class, mock_git_repo_class, temp_dirs
):
"""Test Source mode - using named source from registry."""
from skill_seekers.mcp.server import fetch_config_tool
@@ -491,7 +510,9 @@ class TestCompleteWorkflow:
}
mock_sm_class.return_value = mock_sm
add_result = await add_config_source_tool({"name": "team", "git_url": "https://github.com/myorg/configs.git"})
add_result = await add_config_source_tool(
{"name": "team", "git_url": "https://github.com/myorg/configs.git"}
)
assert "" in add_result[0].text
# Step 2: Fetch config from source

View File

@@ -119,7 +119,11 @@ class TestGenerateConfigTool(unittest.IsolatedAsyncioTestCase):
async def test_generate_config_basic(self):
"""Test basic config generation"""
args = {"name": "test-framework", "url": "https://test-framework.dev/", "description": "Test framework skill"}
args = {
"name": "test-framework",
"url": "https://test-framework.dev/",
"description": "Test framework skill",
}
result = await skill_seeker_server.generate_config_tool(args)
@@ -564,7 +568,9 @@ class TestSubmitConfigTool(unittest.IsolatedAsyncioTestCase):
async def test_submit_config_requires_token(self):
"""Should error without GitHub token"""
args = {"config_json": '{"name": "test", "description": "Test", "base_url": "https://example.com"}'}
args = {
"config_json": '{"name": "test", "description": "Test", "base_url": "https://example.com"}'
}
result = await skill_seeker_server.submit_config_tool(args)
self.assertIn("GitHub token required", result[0].text)
@@ -577,7 +583,9 @@ class TestSubmitConfigTool(unittest.IsolatedAsyncioTestCase):
result = await skill_seeker_server.submit_config_tool(args)
self.assertIn("validation failed", result[0].text.lower())
# ConfigValidator detects missing config type (base_url/repo/pdf)
self.assertTrue("cannot detect" in result[0].text.lower() or "missing" in result[0].text.lower())
self.assertTrue(
"cannot detect" in result[0].text.lower() or "missing" in result[0].text.lower()
)
async def test_submit_config_validates_name_format(self):
"""Should reject invalid name characters"""
@@ -649,7 +657,9 @@ class TestSubmitConfigTool(unittest.IsolatedAsyncioTestCase):
async def test_submit_config_from_file_path(self):
"""Should accept config_path parameter"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump({"name": "testfile", "description": "From file", "base_url": "https://test.com/"}, f)
json.dump(
{"name": "testfile", "description": "From file", "base_url": "https://test.com/"}, f
)
temp_path = f.name
try:

View File

@@ -24,11 +24,29 @@ class TestIssueCategorization:
def test_categorize_issues_basic(self):
"""Test basic issue categorization."""
problems = [
{"title": "OAuth setup fails", "labels": ["bug", "oauth"], "number": 1, "state": "open", "comments": 10},
{"title": "Testing framework issue", "labels": ["testing"], "number": 2, "state": "open", "comments": 5},
{
"title": "OAuth setup fails",
"labels": ["bug", "oauth"],
"number": 1,
"state": "open",
"comments": 10,
},
{
"title": "Testing framework issue",
"labels": ["testing"],
"number": 2,
"state": "open",
"comments": 5,
},
]
solutions = [
{"title": "Fixed OAuth redirect", "labels": ["oauth"], "number": 3, "state": "closed", "comments": 3}
{
"title": "Fixed OAuth redirect",
"labels": ["oauth"],
"number": 3,
"state": "closed",
"comments": 3,
}
]
topics = ["oauth", "testing", "async"]
@@ -43,7 +61,13 @@ class TestIssueCategorization:
def test_categorize_issues_keyword_matching(self):
"""Test keyword matching in titles and labels."""
problems = [
{"title": "Database connection timeout", "labels": ["db"], "number": 1, "state": "open", "comments": 7}
{
"title": "Database connection timeout",
"labels": ["db"],
"number": 1,
"state": "open",
"comments": 7,
}
]
solutions = []
@@ -57,7 +81,13 @@ class TestIssueCategorization:
def test_categorize_issues_multi_keyword_topic(self):
"""Test topics with multiple keywords."""
problems = [
{"title": "Async API call fails", "labels": ["async", "api"], "number": 1, "state": "open", "comments": 8}
{
"title": "Async API call fails",
"labels": ["async", "api"],
"number": 1,
"state": "open",
"comments": 8,
}
]
solutions = []
@@ -71,7 +101,15 @@ class TestIssueCategorization:
def test_categorize_issues_no_match_goes_to_other(self):
"""Test that unmatched issues go to 'other' category."""
problems = [{"title": "Random issue", "labels": ["misc"], "number": 1, "state": "open", "comments": 5}]
problems = [
{
"title": "Random issue",
"labels": ["misc"],
"number": 1,
"state": "open",
"comments": 5,
}
]
solutions = []
topics = ["oauth", "testing"]
@@ -94,7 +132,10 @@ class TestHybridContent:
def test_generate_hybrid_content_basic(self):
"""Test basic hybrid content generation."""
api_data = {"apis": {"oauth_login": {"name": "oauth_login", "status": "matched"}}, "summary": {"total_apis": 1}}
api_data = {
"apis": {"oauth_login": {"name": "oauth_login", "status": "matched"}},
"summary": {"total_apis": 1},
}
github_docs = {
"readme": "# Project README",
@@ -103,12 +144,29 @@ class TestHybridContent:
}
github_insights = {
"metadata": {"stars": 1234, "forks": 56, "language": "Python", "description": "Test project"},
"metadata": {
"stars": 1234,
"forks": 56,
"language": "Python",
"description": "Test project",
},
"common_problems": [
{"title": "OAuth fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug"]}
{
"title": "OAuth fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": ["bug"],
}
],
"known_solutions": [
{"title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": ["bug"]}
{
"title": "Fixed OAuth",
"number": 35,
"state": "closed",
"comments": 5,
"labels": ["bug"],
}
],
"top_labels": [{"label": "bug", "count": 10}, {"label": "enhancement", "count": 5}],
}
@@ -190,11 +248,23 @@ class TestIssueToAPIMatching:
apis = {"oauth_login": {"name": "oauth_login"}, "async_fetch": {"name": "async_fetch"}}
problems = [
{"title": "OAuth login fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug", "oauth"]}
{
"title": "OAuth login fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": ["bug", "oauth"],
}
]
solutions = [
{"title": "Fixed async fetch timeout", "number": 35, "state": "closed", "comments": 5, "labels": ["async"]}
{
"title": "Fixed async fetch timeout",
"number": 35,
"state": "closed",
"comments": 5,
"labels": ["async"],
}
]
issue_links = _match_issues_to_apis(apis, problems, solutions)
@@ -214,7 +284,13 @@ class TestIssueToAPIMatching:
apis = {"database_connect": {"name": "database_connect"}}
problems = [
{"title": "Random unrelated issue", "number": 1, "state": "open", "comments": 5, "labels": ["misc"]}
{
"title": "Random unrelated issue",
"number": 1,
"state": "open",
"comments": 5,
"labels": ["misc"],
}
]
issue_links = _match_issues_to_apis(apis, problems, [])
@@ -226,7 +302,15 @@ class TestIssueToAPIMatching:
"""Test matching with dotted API names."""
apis = {"module.oauth.login": {"name": "module.oauth.login"}}
problems = [{"title": "OAuth module fails", "number": 42, "state": "open", "comments": 10, "labels": ["oauth"]}]
problems = [
{
"title": "OAuth module fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": ["oauth"],
}
]
issue_links = _match_issues_to_apis(apis, problems, [])
@@ -253,8 +337,12 @@ class TestRuleBasedMergerWithGitHubStreams:
)
insights_stream = InsightsStream(
metadata={"stars": 1234, "forks": 56, "language": "Python"},
common_problems=[{"title": "Bug 1", "number": 1, "state": "open", "comments": 10, "labels": ["bug"]}],
known_solutions=[{"title": "Fix 1", "number": 2, "state": "closed", "comments": 5, "labels": ["bug"]}],
common_problems=[
{"title": "Bug 1", "number": 1, "state": "open", "comments": 10, "labels": ["bug"]}
],
known_solutions=[
{"title": "Fix 1", "number": 2, "state": "closed", "comments": 5, "labels": ["bug"]}
],
top_labels=[{"label": "bug", "count": 10}],
)
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
@@ -277,7 +365,9 @@ class TestRuleBasedMergerWithGitHubStreams:
# Create three-stream data
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[])
insights_stream = InsightsStream(metadata={"stars": 500}, common_problems=[], known_solutions=[], top_labels=[])
insights_stream = InsightsStream(
metadata={"stars": 500}, common_problems=[], known_solutions=[], top_labels=[]
)
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Create and run merger
@@ -331,7 +421,12 @@ class TestIntegration:
],
)
insights_stream = InsightsStream(
metadata={"stars": 2500, "forks": 123, "language": "Python", "description": "Test framework"},
metadata={
"stars": 2500,
"forks": 123,
"language": "Python",
"description": "Test framework",
},
common_problems=[
{
"title": "Installation fails on Windows",
@@ -349,7 +444,13 @@ class TestIntegration:
},
],
known_solutions=[
{"title": "Fixed config loading", "number": 130, "state": "closed", "comments": 8, "labels": ["bug"]},
{
"title": "Fixed config loading",
"number": 130,
"state": "closed",
"comments": 8,
"labels": ["bug"],
},
{
"title": "Resolved OAuth timeout",
"number": 125,

View File

@@ -114,8 +114,18 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = {
"documentation": [
{"source_id": "source_a", "base_url": "https://a.com", "total_pages": 5, "refs_dir": refs_dir1},
{"source_id": "source_b", "base_url": "https://b.com", "total_pages": 3, "refs_dir": refs_dir2},
{
"source_id": "source_a",
"base_url": "https://a.com",
"total_pages": 5,
"refs_dir": refs_dir1,
},
{
"source_id": "source_b",
"base_url": "https://b.com",
"total_pages": 3,
"refs_dir": refs_dir2,
},
],
"github": [],
"pdf": [],
@@ -139,7 +149,12 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = {
"documentation": [
{"source_id": "my_source", "base_url": "https://example.com", "total_pages": 10, "refs_dir": refs_dir}
{
"source_id": "my_source",
"base_url": "https://example.com",
"total_pages": 10,
"refs_dir": refs_dir,
}
],
"github": [],
"pdf": [],
@@ -148,7 +163,9 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
builder = UnifiedSkillBuilder(config, scraped_data)
builder._generate_docs_references(scraped_data["documentation"])
source_index = os.path.join(builder.skill_dir, "references", "documentation", "my_source", "index.md")
source_index = os.path.join(
builder.skill_dir, "references", "documentation", "my_source", "index.md"
)
self.assertTrue(os.path.exists(source_index))
with open(source_index) as f:
@@ -169,8 +186,18 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = {
"documentation": [
{"source_id": "docs_one", "base_url": "https://one.com", "total_pages": 10, "refs_dir": refs_dir1},
{"source_id": "docs_two", "base_url": "https://two.com", "total_pages": 20, "refs_dir": refs_dir2},
{
"source_id": "docs_one",
"base_url": "https://one.com",
"total_pages": 10,
"refs_dir": refs_dir1,
},
{
"source_id": "docs_two",
"base_url": "https://two.com",
"total_pages": 20,
"refs_dir": refs_dir2,
},
],
"github": [],
"pdf": [],
@@ -205,7 +232,12 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = {
"documentation": [
{"source_id": "test_source", "base_url": "https://test.com", "total_pages": 5, "refs_dir": refs_dir}
{
"source_id": "test_source",
"base_url": "https://test.com",
"total_pages": 5,
"refs_dir": refs_dir,
}
],
"github": [],
"pdf": [],
@@ -290,7 +322,9 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
builder = UnifiedSkillBuilder(config, scraped_data)
builder._generate_github_references(scraped_data["github"])
readme_path = os.path.join(builder.skill_dir, "references", "github", "test_myrepo", "README.md")
readme_path = os.path.join(
builder.skill_dir, "references", "github", "test_myrepo", "README.md"
)
self.assertTrue(os.path.exists(readme_path))
with open(readme_path) as f:
@@ -338,7 +372,9 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
builder = UnifiedSkillBuilder(config, scraped_data)
builder._generate_github_references(scraped_data["github"])
issues_path = os.path.join(builder.skill_dir, "references", "github", "test_repo", "issues.md")
issues_path = os.path.join(
builder.skill_dir, "references", "github", "test_repo", "issues.md"
)
self.assertTrue(os.path.exists(issues_path))
with open(issues_path) as f:
@@ -358,12 +394,22 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
{
"repo": "org/first",
"repo_id": "org_first",
"data": {"readme": "#", "issues": [], "releases": [], "repo_info": {"stars": 100}},
"data": {
"readme": "#",
"issues": [],
"releases": [],
"repo_info": {"stars": 100},
},
},
{
"repo": "org/second",
"repo_id": "org_second",
"data": {"readme": "#", "issues": [], "releases": [], "repo_info": {"stars": 50}},
"data": {
"readme": "#",
"issues": [],
"releases": [],
"repo_info": {"stars": 50},
},
},
],
"pdf": [],
@@ -406,7 +452,11 @@ class TestUnifiedSkillBuilderPdfReferences(unittest.TestCase):
scraped_data = {
"documentation": [],
"github": [],
"pdf": [{"path": "/path/to/doc1.pdf"}, {"path": "/path/to/doc2.pdf"}, {"path": "/path/to/doc3.pdf"}],
"pdf": [
{"path": "/path/to/doc1.pdf"},
{"path": "/path/to/doc2.pdf"},
{"path": "/path/to/doc3.pdf"},
],
}
builder = UnifiedSkillBuilder(config, scraped_data)

View File

@@ -41,7 +41,9 @@ class TestPackageSkill(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdir:
skill_dir = self.create_test_skill_directory(tmpdir)
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success)
self.assertIsNotNone(zip_path)
@@ -54,7 +56,9 @@ class TestPackageSkill(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdir:
skill_dir = self.create_test_skill_directory(tmpdir)
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success)
@@ -77,7 +81,9 @@ class TestPackageSkill(unittest.TestCase):
# Add a backup file
(skill_dir / "SKILL.md.backup").write_text("# Backup")
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success)
@@ -88,7 +94,9 @@ class TestPackageSkill(unittest.TestCase):
def test_package_nonexistent_directory(self):
"""Test packaging a nonexistent directory"""
success, zip_path = package_skill("/nonexistent/path", open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
"/nonexistent/path", open_folder_after=False, skip_quality_check=True
)
self.assertFalse(success)
self.assertIsNone(zip_path)
@@ -99,7 +107,9 @@ class TestPackageSkill(unittest.TestCase):
skill_dir = Path(tmpdir) / "invalid-skill"
skill_dir.mkdir()
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertFalse(success)
self.assertIsNone(zip_path)
@@ -118,7 +128,9 @@ class TestPackageSkill(unittest.TestCase):
(skill_dir / "scripts").mkdir()
(skill_dir / "assets").mkdir()
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success)
# Zip should be in output directory, not inside skill directory
@@ -135,7 +147,9 @@ class TestPackageSkill(unittest.TestCase):
(skill_dir / "scripts").mkdir()
(skill_dir / "assets").mkdir()
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True)
success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success)
self.assertEqual(zip_path.name, "my-awesome-skill.zip")
@@ -149,7 +163,9 @@ class TestPackageSkillCLI(unittest.TestCase):
import subprocess
try:
result = subprocess.run(["skill-seekers", "package", "--help"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers", "package", "--help"], capture_output=True, text=True, timeout=5
)
# argparse may return 0 or 2 for --help
self.assertIn(result.returncode, [0, 2])
@@ -163,7 +179,9 @@ class TestPackageSkillCLI(unittest.TestCase):
import subprocess
try:
result = subprocess.run(["skill-seekers-package", "--help"], capture_output=True, text=True, timeout=5)
result = subprocess.run(
["skill-seekers-package", "--help"], capture_output=True, text=True, timeout=5
)
# argparse may return 0 or 2 for --help
self.assertIn(result.returncode, [0, 2])

View File

@@ -126,7 +126,9 @@ class TestPackageStructure:
def test_mcp_tools_init_file_exists(self):
"""Test that src/skill_seekers/mcp/tools/__init__.py exists."""
init_file = Path(__file__).parent.parent / "src" / "skill_seekers" / "mcp" / "tools" / "__init__.py"
init_file = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "mcp" / "tools" / "__init__.py"
)
assert init_file.exists(), "src/skill_seekers/mcp/tools/__init__.py not found"
def test_cli_init_has_docstring(self):

View File

@@ -108,7 +108,11 @@ class TestUnlimitedMode(unittest.TestCase):
def test_limited_mode_default(self):
"""Test default max_pages is limited"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}}
config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir:
os.chdir(tmpdir)
@@ -145,7 +149,11 @@ class TestRateLimiting(unittest.TestCase):
def test_rate_limit_default(self):
"""Test default rate_limit is 0.5"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}}
config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir:
os.chdir(tmpdir)

Some files were not shown because too many files have changed in this diff Show More