change max lenght

This commit is contained in:
Pablo Estevez
2026-01-17 17:48:15 +00:00
parent 97e597d9db
commit c33c6f9073
118 changed files with 3546 additions and 960 deletions

View File

@@ -274,19 +274,24 @@ class ConfigAnalyzer:
# Add source type tags # Add source type tags
if "base_url" in config_data or ( if "base_url" in config_data or (
config_type == "unified" and any(s.get("type") == "documentation" for s in config_data.get("sources", [])) config_type == "unified"
and any(s.get("type") == "documentation" for s in config_data.get("sources", []))
): ):
tags.add("documentation") tags.add("documentation")
if "repo" in config_data or ( if "repo" in config_data or (
config_type == "unified" and any(s.get("type") == "github" for s in config_data.get("sources", [])) config_type == "unified"
and any(s.get("type") == "github" for s in config_data.get("sources", []))
): ):
tags.add("github") tags.add("github")
if ( if (
"pdf" in config_data "pdf" in config_data
or "pdf_url" in config_data or "pdf_url" in config_data
or (config_type == "unified" and any(s.get("type") == "pdf" for s in config_data.get("sources", []))) or (
config_type == "unified"
and any(s.get("type") == "pdf" for s in config_data.get("sources", []))
)
): ):
tags.add("pdf") tags.add("pdf")

View File

@@ -58,7 +58,9 @@ async def root():
@app.get("/api/configs") @app.get("/api/configs")
async def list_configs(category: str | None = None, tag: str | None = None, type: str | None = None) -> dict[str, Any]: async def list_configs(
category: str | None = None, tag: str | None = None, type: str | None = None
) -> dict[str, Any]:
""" """
List all available configs with metadata List all available configs with metadata

View File

@@ -46,7 +46,13 @@ print()
print("**By Type:**") print("**By Type:**")
for conflict_type, count in summary["by_type"].items(): for conflict_type, count in summary["by_type"].items():
if count > 0: if count > 0:
emoji = "📖" if conflict_type == "missing_in_docs" else "💻" if conflict_type == "missing_in_code" else "⚠️" emoji = (
"📖"
if conflict_type == "missing_in_docs"
else "💻"
if conflict_type == "missing_in_code"
else "⚠️"
)
print(f" {emoji} {conflict_type}: {count}") print(f" {emoji} {conflict_type}: {count}")
print() print()
@@ -86,10 +92,14 @@ if high:
if conflict["code_info"]: if conflict["code_info"]:
print("\n**Implemented as**:") print("\n**Implemented as**:")
params = conflict["code_info"].get("parameters", []) params = conflict["code_info"].get("parameters", [])
param_str = ", ".join(f"{p['name']}: {p.get('type_hint', 'Any')}" for p in params if p["name"] != "self") param_str = ", ".join(
f"{p['name']}: {p.get('type_hint', 'Any')}" for p in params if p["name"] != "self"
)
print(f" Signature: {conflict['code_info']['name']}({param_str})") print(f" Signature: {conflict['code_info']['name']}({param_str})")
print(f" Return type: {conflict['code_info'].get('return_type', 'None')}") print(f" Return type: {conflict['code_info'].get('return_type', 'None')}")
print(f" Location: {conflict['code_info'].get('source', 'N/A')}:{conflict['code_info'].get('line', '?')}") print(
f" Location: {conflict['code_info'].get('source', 'N/A')}:{conflict['code_info'].get('line', '?')}"
)
print() print()
# Show medium severity # Show medium severity

View File

@@ -171,7 +171,7 @@ exclude_lines = [
] ]
[tool.ruff] [tool.ruff]
line-length = 120 line-length = 100
target-version = "py310" target-version = "py310"
src = ["src", "tests"] src = ["src", "tests"]

View File

@@ -67,7 +67,9 @@ def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor:
if platform not in ADAPTORS: if platform not in ADAPTORS:
available = ", ".join(ADAPTORS.keys()) available = ", ".join(ADAPTORS.keys())
if not ADAPTORS: if not ADAPTORS:
raise ValueError(f"No adaptors are currently implemented. Platform '{platform}' is not available.") raise ValueError(
f"No adaptors are currently implemented. Platform '{platform}' is not available."
)
raise ValueError( raise ValueError(
f"Platform '{platform}' is not supported or not yet implemented. Available platforms: {available}" f"Platform '{platform}' is not supported or not yet implemented. Available platforms: {available}"
) )

View File

@@ -167,14 +167,28 @@ version: {metadata.version}
# Validate ZIP file # Validate ZIP file
package_path = Path(package_path) package_path = Path(package_path)
if not package_path.exists(): if not package_path.exists():
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"File not found: {package_path}",
}
if not package_path.suffix == ".zip": if not package_path.suffix == ".zip":
return {"success": False, "skill_id": None, "url": None, "message": f"Not a ZIP file: {package_path}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Not a ZIP file: {package_path}",
}
# Prepare API request # Prepare API request
api_url = self.DEFAULT_API_ENDPOINT api_url = self.DEFAULT_API_ENDPOINT
headers = {"x-api-key": api_key, "anthropic-version": "2023-06-01", "anthropic-beta": "skills-2025-10-02"} headers = {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"anthropic-beta": "skills-2025-10-02",
}
timeout = kwargs.get("timeout", 60) timeout = kwargs.get("timeout", 60)
@@ -231,7 +245,12 @@ version: {metadata.version}
except: except:
error_msg = f"HTTP {response.status_code}" error_msg = f"HTTP {response.status_code}"
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {error_msg}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Upload failed: {error_msg}",
}
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
return { return {
@@ -250,7 +269,12 @@ version: {metadata.version}
} }
except Exception as e: except Exception as e:
return {"success": False, "skill_id": None, "url": None, "message": f"Unexpected error: {str(e)}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Unexpected error: {str(e)}",
}
def validate_api_key(self, api_key: str) -> bool: def validate_api_key(self, api_key: str) -> bool:
""" """
@@ -363,7 +387,9 @@ version: {metadata.version}
print(f"❌ Error calling Claude API: {e}") print(f"❌ Error calling Claude API: {e}")
return False return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]: def _read_reference_files(
self, references_dir: Path, max_chars: int = 200000
) -> dict[str, str]:
""" """
Read reference markdown files from skill directory. Read reference markdown files from skill directory.

View File

@@ -169,10 +169,20 @@ See the references directory for complete documentation with examples and best p
# Validate package file FIRST # Validate package file FIRST
package_path = Path(package_path) package_path = Path(package_path)
if not package_path.exists(): if not package_path.exists():
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"File not found: {package_path}",
}
if not package_path.suffix == ".gz": if not package_path.suffix == ".gz":
return {"success": False, "skill_id": None, "url": None, "message": f"Not a tar.gz file: {package_path}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Not a tar.gz file: {package_path}",
}
# Check for google-generativeai library # Check for google-generativeai library
try: try:
@@ -210,7 +220,9 @@ See the references directory for complete documentation with examples and best p
} }
# Upload to Files API # Upload to Files API
uploaded_file = genai.upload_file(path=str(main_file), display_name=f"{package_path.stem}_instructions") uploaded_file = genai.upload_file(
path=str(main_file), display_name=f"{package_path.stem}_instructions"
)
# Upload reference files (if any) # Upload reference files (if any)
refs_dir = temp_path / "references" refs_dir = temp_path / "references"
@@ -230,7 +242,12 @@ See the references directory for complete documentation with examples and best p
} }
except Exception as e: except Exception as e:
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {str(e)}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Upload failed: {str(e)}",
}
def validate_api_key(self, api_key: str) -> bool: def validate_api_key(self, api_key: str) -> bool:
""" """
@@ -337,7 +354,9 @@ See the references directory for complete documentation with examples and best p
print(f"❌ Error calling Gemini API: {e}") print(f"❌ Error calling Gemini API: {e}")
return False return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]: def _read_reference_files(
self, references_dir: Path, max_chars: int = 200000
) -> dict[str, str]:
""" """
Read reference markdown files from skill directory. Read reference markdown files from skill directory.

View File

@@ -185,10 +185,20 @@ Always prioritize accuracy by consulting the attached documentation files before
# Validate package file FIRST # Validate package file FIRST
package_path = Path(package_path) package_path = Path(package_path)
if not package_path.exists(): if not package_path.exists():
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"File not found: {package_path}",
}
if not package_path.suffix == ".zip": if not package_path.suffix == ".zip":
return {"success": False, "skill_id": None, "url": None, "message": f"Not a ZIP file: {package_path}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Not a ZIP file: {package_path}",
}
# Check for openai library # Check for openai library
try: try:
@@ -254,7 +264,9 @@ Always prioritize accuracy by consulting the attached documentation files before
# Attach files to vector store # Attach files to vector store
if file_ids: if file_ids:
client.beta.vector_stores.files.create_batch(vector_store_id=vector_store.id, file_ids=file_ids) client.beta.vector_stores.files.create_batch(
vector_store_id=vector_store.id, file_ids=file_ids
)
# Create assistant # Create assistant
assistant = client.beta.assistants.create( assistant = client.beta.assistants.create(
@@ -273,7 +285,12 @@ Always prioritize accuracy by consulting the attached documentation files before
} }
except Exception as e: except Exception as e:
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {str(e)}"} return {
"success": False,
"skill_id": None,
"url": None,
"message": f"Upload failed: {str(e)}",
}
def validate_api_key(self, api_key: str) -> bool: def validate_api_key(self, api_key: str) -> bool:
""" """
@@ -389,7 +406,9 @@ Always prioritize accuracy by consulting the attached documentation files before
print(f"❌ Error calling OpenAI API: {e}") print(f"❌ Error calling OpenAI API: {e}")
return False return False
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]: def _read_reference_files(
self, references_dir: Path, max_chars: int = 200000
) -> dict[str, str]:
""" """
Read reference markdown files from skill directory. Read reference markdown files from skill directory.

View File

@@ -66,7 +66,9 @@ class AIEnhancer:
self.mode = "disabled" self.mode = "disabled"
self.enabled = False self.enabled = False
logger.info(" AI enhancement disabled (no API key found)") logger.info(" AI enhancement disabled (no API key found)")
logger.info(" Set ANTHROPIC_API_KEY to enable, or use 'skill-seekers enhance' for SKILL.md") logger.info(
" Set ANTHROPIC_API_KEY to enable, or use 'skill-seekers enhance' for SKILL.md"
)
return return
if self.mode == "api" and self.enabled: if self.mode == "api" and self.enabled:
@@ -86,7 +88,9 @@ class AIEnhancer:
# LOCAL mode requires Claude Code to be available # LOCAL mode requires Claude Code to be available
# For patterns/examples, this is less practical than API mode # For patterns/examples, this is less practical than API mode
logger.info(" LOCAL mode not yet supported for pattern/example enhancement") logger.info(" LOCAL mode not yet supported for pattern/example enhancement")
logger.info(" Use API mode (set ANTHROPIC_API_KEY) or 'skill-seekers enhance' for SKILL.md") logger.info(
" Use API mode (set ANTHROPIC_API_KEY) or 'skill-seekers enhance' for SKILL.md"
)
self.enabled = False self.enabled = False
def _call_claude(self, prompt: str, max_tokens: int = 1000) -> str | None: def _call_claude(self, prompt: str, max_tokens: int = 1000) -> str | None:
@@ -96,7 +100,9 @@ class AIEnhancer:
try: try:
response = self.client.messages.create( response = self.client.messages.create(
model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}] model="claude-sonnet-4-20250514",
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
) )
return response.content[0].text return response.content[0].text
except Exception as e: except Exception as e:

View File

@@ -94,7 +94,9 @@ class APIReferenceBuilder:
name_without_ext = basename.rsplit(".", 1)[0] if "." in basename else basename name_without_ext = basename.rsplit(".", 1)[0] if "." in basename else basename
return f"{name_without_ext}.md" return f"{name_without_ext}.md"
def _generate_file_reference(self, file_data: dict[str, Any], source_file: str, language: str) -> str: def _generate_file_reference(
self, file_data: dict[str, Any], source_file: str, language: str
) -> str:
""" """
Generate complete markdown reference for a single file. Generate complete markdown reference for a single file.
@@ -334,7 +336,9 @@ def main():
""" """
import argparse import argparse
parser = argparse.ArgumentParser(description="Generate API reference from code analysis results") parser = argparse.ArgumentParser(
description="Generate API reference from code analysis results"
)
parser.add_argument("input_file", help="Code analysis JSON file") parser.add_argument("input_file", help="Code analysis JSON file")
parser.add_argument("output_dir", help="Output directory for markdown files") parser.add_argument("output_dir", help="Output directory for markdown files")

View File

@@ -197,7 +197,9 @@ class ArchitecturalPatternDetector:
return detected return detected
def _detect_mvc(self, dirs: dict[str, int], files: list[dict], frameworks: list[str]) -> list[ArchitecturalPattern]: def _detect_mvc(
self, dirs: dict[str, int], files: list[dict], frameworks: list[str]
) -> list[ArchitecturalPattern]:
"""Detect MVC pattern""" """Detect MVC pattern"""
patterns = [] patterns = []
@@ -226,7 +228,9 @@ class ArchitecturalPatternDetector:
if len(components["Views"]) == 1: if len(components["Views"]) == 1:
evidence.append("Views directory with view files") evidence.append("Views directory with view files")
if "controller" in file_path and ("controllers/" in file_path or "/controller/" in file_path): if "controller" in file_path and (
"controllers/" in file_path or "/controller/" in file_path
):
components["Controllers"].append(file.get("file", "")) components["Controllers"].append(file.get("file", ""))
if len(components["Controllers"]) == 1: if len(components["Controllers"]) == 1:
evidence.append("Controllers directory with controller classes") evidence.append("Controllers directory with controller classes")
@@ -288,11 +292,15 @@ class ArchitecturalPatternDetector:
if "view" in file_path: if "view" in file_path:
components["Views"].append(file.get("file", "")) components["Views"].append(file.get("file", ""))
if "viewmodel" in file_path or any("viewmodel" in c.get("name", "").lower() for c in classes): if "viewmodel" in file_path or any(
"viewmodel" in c.get("name", "").lower() for c in classes
):
components["ViewModels"].append(file.get("file", "")) components["ViewModels"].append(file.get("file", ""))
if len(components["ViewModels"]) >= 2: if len(components["ViewModels"]) >= 2:
evidence.append(f"ViewModels directory with {len(components['ViewModels'])} ViewModel classes") evidence.append(
f"ViewModels directory with {len(components['ViewModels'])} ViewModel classes"
)
if len(components["Views"]) >= 2: if len(components["Views"]) >= 2:
evidence.append(f"Views directory with {len(components['Views'])} view files") evidence.append(f"Views directory with {len(components['Views'])} view files")
@@ -329,7 +337,9 @@ class ArchitecturalPatternDetector:
return patterns return patterns
def _detect_repository(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]: def _detect_repository(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Repository pattern""" """Detect Repository pattern"""
patterns = [] patterns = []
@@ -352,7 +362,9 @@ class ArchitecturalPatternDetector:
components["Repositories"].append(file.get("file", "")) components["Repositories"].append(file.get("file", ""))
if len(components["Repositories"]) >= 2: if len(components["Repositories"]) >= 2:
evidence.append(f"Repository pattern: {len(components['Repositories'])} repository classes") evidence.append(
f"Repository pattern: {len(components['Repositories'])} repository classes"
)
evidence.append("Repositories abstract data access logic") evidence.append("Repositories abstract data access logic")
patterns.append( patterns.append(
@@ -367,7 +379,9 @@ class ArchitecturalPatternDetector:
return patterns return patterns
def _detect_service_layer(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]: def _detect_service_layer(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Service Layer pattern""" """Detect Service Layer pattern"""
patterns = [] patterns = []
@@ -404,7 +418,9 @@ class ArchitecturalPatternDetector:
return patterns return patterns
def _detect_layered_architecture(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]: def _detect_layered_architecture(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Layered Architecture (3-tier, N-tier)""" """Detect Layered Architecture (3-tier, N-tier)"""
patterns = [] patterns = []
@@ -444,7 +460,9 @@ class ArchitecturalPatternDetector:
return patterns return patterns
def _detect_clean_architecture(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]: def _detect_clean_architecture(
self, dirs: dict[str, int], files: list[dict]
) -> list[ArchitecturalPattern]:
"""Detect Clean Architecture""" """Detect Clean Architecture"""
patterns = [] patterns = []

View File

@@ -150,7 +150,9 @@ class CodeAnalyzer:
is_method = any( is_method = any(
isinstance(parent, ast.ClassDef) isinstance(parent, ast.ClassDef)
for parent in ast.walk(tree) for parent in ast.walk(tree)
if hasattr(parent, "body") and isinstance(parent.body, list) and node in parent.body if hasattr(parent, "body")
and isinstance(parent.body, list)
and node in parent.body
) )
except (TypeError, AttributeError): except (TypeError, AttributeError):
# If body is not iterable or check fails, assume it's a top-level function # If body is not iterable or check fails, assume it's a top-level function
@@ -173,7 +175,9 @@ class CodeAnalyzer:
if isinstance(base, ast.Name): if isinstance(base, ast.Name):
bases.append(base.id) bases.append(base.id)
elif isinstance(base, ast.Attribute): elif isinstance(base, ast.Attribute):
bases.append(f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr) bases.append(
f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr
)
# Extract methods # Extract methods
methods = [] methods = []
@@ -186,7 +190,11 @@ class CodeAnalyzer:
docstring = ast.get_docstring(node) docstring = ast.get_docstring(node)
return ClassSignature( return ClassSignature(
name=node.name, base_classes=bases, methods=methods, docstring=docstring, line_number=node.lineno name=node.name,
base_classes=bases,
methods=methods,
docstring=docstring,
line_number=node.lineno,
) )
def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature: def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
@@ -209,7 +217,9 @@ class CodeAnalyzer:
param_idx = num_no_default + i param_idx = num_no_default + i
if param_idx < len(params): if param_idx < len(params):
try: try:
params[param_idx].default = ast.unparse(default) if hasattr(ast, "unparse") else str(default) params[param_idx].default = (
ast.unparse(default) if hasattr(ast, "unparse") else str(default)
)
except: except:
params[param_idx].default = "..." params[param_idx].default = "..."
@@ -719,7 +729,9 @@ class CodeAnalyzer:
# Distinguish XML doc comments (///) # Distinguish XML doc comments (///)
comment_type = "doc" if match.group(1).startswith("/") else "inline" comment_type = "doc" if match.group(1).startswith("/") else "inline"
comments.append({"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}) comments.append(
{"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}
)
# Multi-line comments (/* */) # Multi-line comments (/* */)
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL): for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
@@ -1325,9 +1337,7 @@ class CodeAnalyzer:
"""Extract PHP method signatures from class body.""" """Extract PHP method signatures from class body."""
methods = [] methods = []
method_pattern = ( method_pattern = r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
)
for match in re.finditer(method_pattern, class_body): for match in re.finditer(method_pattern, class_body):
method_name = match.group(1) method_name = match.group(1)
params_str = match.group(2) params_str = match.group(2)
@@ -1445,7 +1455,8 @@ def create_sprite(texture: str) -> Node2D:
for method in cls["methods"]: for method in cls["methods"]:
params = ", ".join( params = ", ".join(
[ [
f"{p['name']}: {p['type_hint']}" + (f" = {p['default']}" if p.get("default") else "") f"{p['name']}: {p['type_hint']}"
+ (f" = {p['default']}" if p.get("default") else "")
for p in method["parameters"] for p in method["parameters"]
] ]
) )

View File

@@ -301,7 +301,11 @@ def analyze_codebase(
# Only include files with actual analysis results # Only include files with actual analysis results
if analysis and (analysis.get("classes") or analysis.get("functions")): if analysis and (analysis.get("classes") or analysis.get("functions")):
results["files"].append( results["files"].append(
{"file": str(file_path.relative_to(directory)), "language": language, **analysis} {
"file": str(file_path.relative_to(directory)),
"language": language,
**analysis,
}
) )
analyzed_count += 1 analyzed_count += 1
@@ -441,7 +445,10 @@ def analyze_codebase(
# Create extractor # Create extractor
test_extractor = TestExampleExtractor( test_extractor = TestExampleExtractor(
min_confidence=0.5, max_per_file=10, languages=languages, enhance_with_ai=enhance_with_ai min_confidence=0.5,
max_per_file=10,
languages=languages,
enhance_with_ai=enhance_with_ai,
) )
# Extract examples from directory # Extract examples from directory
@@ -487,7 +494,11 @@ def analyze_codebase(
tutorials_dir = output_dir / "tutorials" tutorials_dir = output_dir / "tutorials"
# Get workflow examples from the example_report if available # Get workflow examples from the example_report if available
if "example_report" in locals() and example_report and example_report.total_examples > 0: if (
"example_report" in locals()
and example_report
and example_report.total_examples > 0
):
# Convert example_report to list of dicts for processing # Convert example_report to list of dicts for processing
examples_list = example_report.to_dict().get("examples", []) examples_list = example_report.to_dict().get("examples", [])
@@ -565,7 +576,9 @@ def analyze_codebase(
if "ai_enhancements" in result_dict: if "ai_enhancements" in result_dict:
insights = result_dict["ai_enhancements"].get("overall_insights", {}) insights = result_dict["ai_enhancements"].get("overall_insights", {})
if insights.get("security_issues_found"): if insights.get("security_issues_found"):
logger.info(f"🔐 Security issues found: {insights['security_issues_found']}") logger.info(
f"🔐 Security issues found: {insights['security_issues_found']}"
)
logger.info(f"📁 Saved to: {config_output}") logger.info(f"📁 Saved to: {config_output}")
else: else:
@@ -741,10 +754,14 @@ Use this skill when you need to:
refs_added = False refs_added = False
if build_api_reference and (output_dir / "api_reference").exists(): if build_api_reference and (output_dir / "api_reference").exists():
skill_content += "- **API Reference**: `references/api_reference/` - Complete API documentation\n" skill_content += (
"- **API Reference**: `references/api_reference/` - Complete API documentation\n"
)
refs_added = True refs_added = True
if build_dependency_graph and (output_dir / "dependencies").exists(): if build_dependency_graph and (output_dir / "dependencies").exists():
skill_content += "- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n" skill_content += (
"- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
)
refs_added = True refs_added = True
if detect_patterns and (output_dir / "patterns").exists(): if detect_patterns and (output_dir / "patterns").exists():
skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n" skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
@@ -753,7 +770,9 @@ Use this skill when you need to:
skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n" skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
refs_added = True refs_added = True
if extract_config_patterns and (output_dir / "config_patterns").exists(): if extract_config_patterns and (output_dir / "config_patterns").exists():
skill_content += "- **Configuration**: `references/config_patterns/` - Configuration patterns\n" skill_content += (
"- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
)
refs_added = True refs_added = True
if (output_dir / "architecture").exists(): if (output_dir / "architecture").exists():
skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n" skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
@@ -1057,12 +1076,21 @@ Examples:
) )
parser.add_argument("--directory", required=True, help="Directory to analyze") parser.add_argument("--directory", required=True, help="Directory to analyze")
parser.add_argument("--output", default="output/codebase/", help="Output directory (default: output/codebase/)")
parser.add_argument( parser.add_argument(
"--depth", choices=["surface", "deep", "full"], default="deep", help="Analysis depth (default: deep)" "--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
)
parser.add_argument(
"--depth",
choices=["surface", "deep", "full"],
default="deep",
help="Analysis depth (default: deep)",
)
parser.add_argument(
"--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
)
parser.add_argument(
"--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
) )
parser.add_argument("--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)")
parser.add_argument("--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)")
parser.add_argument( parser.add_argument(
"--skip-api-reference", "--skip-api-reference",
action="store_true", action="store_true",

View File

@@ -320,9 +320,11 @@ def api_keys_menu():
if key: if key:
import os import os
env_var = {"anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "openai": "OPENAI_API_KEY"}[ env_var = {
provider "anthropic": "ANTHROPIC_API_KEY",
] "google": "GOOGLE_API_KEY",
"openai": "OPENAI_API_KEY",
}[provider]
if os.getenv(env_var): if os.getenv(env_var):
source = " (from environment)" source = " (from environment)"
else: else:
@@ -389,7 +391,9 @@ def rate_limit_settings():
print(f" • Show countdown: {current['show_countdown']}\n") print(f" • Show countdown: {current['show_countdown']}\n")
# Timeout # Timeout
timeout_input = input(f"Default timeout in minutes [{current['default_timeout_minutes']}]: ").strip() timeout_input = input(
f"Default timeout in minutes [{current['default_timeout_minutes']}]: "
).strip()
if timeout_input: if timeout_input:
try: try:
config.config["rate_limit"]["default_timeout_minutes"] = int(timeout_input) config.config["rate_limit"]["default_timeout_minutes"] = int(timeout_input)
@@ -398,13 +402,17 @@ def rate_limit_settings():
# Auto-switch # Auto-switch
auto_switch_input = ( auto_switch_input = (
input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ").strip().lower() input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ")
.strip()
.lower()
) )
if auto_switch_input: if auto_switch_input:
config.config["rate_limit"]["auto_switch_profiles"] = auto_switch_input in ["y", "yes"] config.config["rate_limit"]["auto_switch_profiles"] = auto_switch_input in ["y", "yes"]
# Show countdown # Show countdown
countdown_input = input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower() countdown_input = (
input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower()
)
if countdown_input: if countdown_input:
config.config["rate_limit"]["show_countdown"] = countdown_input in ["y", "yes"] config.config["rate_limit"]["show_countdown"] = countdown_input in ["y", "yes"]
@@ -427,7 +435,9 @@ def resume_settings():
print(f" • Keep progress for: {current['keep_progress_days']} days\n") print(f" • Keep progress for: {current['keep_progress_days']} days\n")
# Auto-save interval # Auto-save interval
interval_input = input(f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: ").strip() interval_input = input(
f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: "
).strip()
if interval_input: if interval_input:
try: try:
config.config["resume"]["auto_save_interval_seconds"] = int(interval_input) config.config["resume"]["auto_save_interval_seconds"] = int(interval_input)
@@ -435,7 +445,9 @@ def resume_settings():
print("⚠️ Invalid input, keeping current value") print("⚠️ Invalid input, keeping current value")
# Keep days # Keep days
days_input = input(f"Keep progress for how many days [{current['keep_progress_days']}]: ").strip() days_input = input(
f"Keep progress for how many days [{current['keep_progress_days']}]: "
).strip()
if days_input: if days_input:
try: try:
config.config["resume"]["keep_progress_days"] = int(days_input) config.config["resume"]["keep_progress_days"] = int(days_input)
@@ -467,7 +479,9 @@ def test_connections():
token = config.config["github"]["profiles"][p["name"]]["token"] token = config.config["github"]["profiles"][p["name"]]["token"]
try: try:
response = requests.get( response = requests.get(
"https://api.github.com/rate_limit", headers={"Authorization": f"token {token}"}, timeout=5 "https://api.github.com/rate_limit",
headers={"Authorization": f"token {token}"},
timeout=5,
) )
if response.status_code == 200: if response.status_code == 200:
data = response.json() data = response.json()

View File

@@ -136,7 +136,9 @@ class ConfigEnhancer:
# Call Claude API # Call Claude API
logger.info("📡 Calling Claude API for config analysis...") logger.info("📡 Calling Claude API for config analysis...")
response = self.client.messages.create( response = self.client.messages.create(
model="claude-sonnet-4-20250514", max_tokens=8000, messages=[{"role": "user", "content": prompt}] model="claude-sonnet-4-20250514",
max_tokens=8000,
messages=[{"role": "user", "content": prompt}],
) )
# Parse response # Parse response
@@ -157,7 +159,9 @@ class ConfigEnhancer:
for cf in config_files[:10]: # Limit to first 10 files for cf in config_files[:10]: # Limit to first 10 files
settings_summary = [] settings_summary = []
for setting in cf.get("settings", [])[:5]: # First 5 settings per file for setting in cf.get("settings", [])[:5]: # First 5 settings per file
settings_summary.append(f" - {setting['key']}: {setting['value']} ({setting['value_type']})") settings_summary.append(
f" - {setting['key']}: {setting['value']} ({setting['value_type']})"
)
config_summary.append(f""" config_summary.append(f"""
File: {cf["relative_path"]} ({cf["config_type"]}) File: {cf["relative_path"]} ({cf["config_type"]})
@@ -221,7 +225,9 @@ Focus on actionable insights that help developers understand and improve their c
original_result["ai_enhancements"] = enhancements original_result["ai_enhancements"] = enhancements
# Add enhancement flags to config files # Add enhancement flags to config files
file_enhancements = {e["file_path"]: e for e in enhancements.get("file_enhancements", [])} file_enhancements = {
e["file_path"]: e for e in enhancements.get("file_enhancements", [])
}
for cf in original_result.get("config_files", []): for cf in original_result.get("config_files", []):
file_path = cf.get("relative_path", cf.get("file_path")) file_path = cf.get("relative_path", cf.get("file_path"))
if file_path in file_enhancements: if file_path in file_enhancements:
@@ -385,9 +391,14 @@ def main():
parser = argparse.ArgumentParser(description="AI-enhance configuration extraction results") parser = argparse.ArgumentParser(description="AI-enhance configuration extraction results")
parser.add_argument("result_file", help="Path to config extraction JSON result file") parser.add_argument("result_file", help="Path to config extraction JSON result file")
parser.add_argument( parser.add_argument(
"--mode", choices=["auto", "api", "local"], default="auto", help="Enhancement mode (default: auto)" "--mode",
choices=["auto", "api", "local"],
default="auto",
help="Enhancement mode (default: auto)",
)
parser.add_argument(
"--output", help="Output file for enhanced results (default: <input>_enhanced.json)"
) )
parser.add_argument("--output", help="Output file for enhanced results (default: <input>_enhanced.json)")
args = parser.parse_args() args = parser.parse_args()

View File

@@ -63,7 +63,9 @@ class ConfigFile:
file_path: str file_path: str
relative_path: str relative_path: str
config_type: Literal["json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"] config_type: Literal[
"json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"
]
purpose: str # Inferred purpose: database, api, logging, etc. purpose: str # Inferred purpose: database, api, logging, etc.
settings: list[ConfigSetting] = field(default_factory=list) settings: list[ConfigSetting] = field(default_factory=list)
patterns: list[str] = field(default_factory=list) patterns: list[str] = field(default_factory=list)
@@ -156,11 +158,23 @@ class ConfigFileDetector:
CONFIG_PATTERNS = { CONFIG_PATTERNS = {
"json": { "json": {
"patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"], "patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"],
"names": ["config.json", "settings.json", "app.json", ".eslintrc.json", ".prettierrc.json"], "names": [
"config.json",
"settings.json",
"app.json",
".eslintrc.json",
".prettierrc.json",
],
}, },
"yaml": { "yaml": {
"patterns": ["*.yaml", "*.yml"], "patterns": ["*.yaml", "*.yml"],
"names": ["config.yml", "settings.yml", ".travis.yml", ".gitlab-ci.yml", "docker-compose.yml"], "names": [
"config.yml",
"settings.yml",
".travis.yml",
".gitlab-ci.yml",
"docker-compose.yml",
],
}, },
"toml": { "toml": {
"patterns": ["*.toml"], "patterns": ["*.toml"],
@@ -498,7 +512,9 @@ class ConfigParser:
key = match.group(1) key = match.group(1)
value = match.group(3) if len(match.groups()) > 2 else match.group(2) value = match.group(3) if len(match.groups()) > 2 else match.group(2)
setting = ConfigSetting(key=key, value=value, value_type=self._infer_type(value)) setting = ConfigSetting(
key=key, value=value, value_type=self._infer_type(value)
)
config_file.settings.append(setting) config_file.settings.append(setting)
def _parse_dockerfile(self, config_file: ConfigFile): def _parse_dockerfile(self, config_file: ConfigFile):
@@ -514,7 +530,10 @@ class ConfigParser:
if len(parts) == 2: if len(parts) == 2:
key, value = parts key, value = parts
setting = ConfigSetting( setting = ConfigSetting(
key=key.strip(), value=value.strip(), value_type="string", env_var=key.strip() key=key.strip(),
value=value.strip(),
value_type="string",
env_var=key.strip(),
) )
config_file.settings.append(setting) config_file.settings.append(setting)
@@ -527,7 +546,9 @@ class ConfigParser:
setting = ConfigSetting(key=key, value=value, value_type="string") setting = ConfigSetting(key=key, value=value, value_type="string")
config_file.settings.append(setting) config_file.settings.append(setting)
def _extract_settings_from_dict(self, data: dict, config_file: ConfigFile, parent_path: list[str] = None): def _extract_settings_from_dict(
self, data: dict, config_file: ConfigFile, parent_path: list[str] = None
):
"""Recursively extract settings from dictionary""" """Recursively extract settings from dictionary"""
if parent_path is None: if parent_path is None:
parent_path = [] parent_path = []
@@ -636,7 +657,9 @@ class ConfigPatternDetector:
if matches >= min_match: if matches >= min_match:
detected.append(pattern_name) detected.append(pattern_name)
logger.debug(f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)") logger.debug(
f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)"
)
return detected return detected
@@ -649,7 +672,9 @@ class ConfigExtractor:
self.parser = ConfigParser() self.parser = ConfigParser()
self.pattern_detector = ConfigPatternDetector() self.pattern_detector = ConfigPatternDetector()
def extract_from_directory(self, directory: Path, max_files: int = 100) -> ConfigExtractionResult: def extract_from_directory(
self, directory: Path, max_files: int = 100
) -> ConfigExtractionResult:
""" """
Extract configuration patterns from directory. Extract configuration patterns from directory.
@@ -695,7 +720,9 @@ class ConfigExtractor:
logger.error(error_msg) logger.error(error_msg)
result.errors.append(error_msg) result.errors.append(error_msg)
logger.info(f"Extracted {result.total_settings} settings from {result.total_files} config files") logger.info(
f"Extracted {result.total_settings} settings from {result.total_files} config files"
)
logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}") logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}")
return result return result
@@ -741,12 +768,18 @@ def main():
) )
parser.add_argument("directory", type=Path, help="Directory to analyze") parser.add_argument("directory", type=Path, help="Directory to analyze")
parser.add_argument("--output", "-o", type=Path, help="Output JSON file") parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
parser.add_argument("--max-files", type=int, default=100, help="Maximum config files to process")
parser.add_argument( parser.add_argument(
"--enhance", action="store_true", help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)" "--max-files", type=int, default=100, help="Maximum config files to process"
) )
parser.add_argument( parser.add_argument(
"--enhance-local", action="store_true", help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)" "--enhance",
action="store_true",
help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)",
)
parser.add_argument(
"--enhance-local",
action="store_true",
help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)",
) )
parser.add_argument( parser.add_argument(
"--ai-mode", "--ai-mode",

View File

@@ -27,7 +27,11 @@ class ConfigManager:
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
"version": "1.0", "version": "1.0",
"github": {"default_profile": None, "profiles": {}}, "github": {"default_profile": None, "profiles": {}},
"rate_limit": {"default_timeout_minutes": 30, "auto_switch_profiles": True, "show_countdown": True}, "rate_limit": {
"default_timeout_minutes": 30,
"auto_switch_profiles": True,
"show_countdown": True,
},
"resume": {"auto_save_interval_seconds": 60, "keep_progress_days": 7}, "resume": {"auto_save_interval_seconds": 60, "keep_progress_days": 7},
"api_keys": {"anthropic": None, "google": None, "openai": None}, "api_keys": {"anthropic": None, "google": None, "openai": None},
"first_run": {"completed": False, "version": "2.7.0"}, "first_run": {"completed": False, "version": "2.7.0"},
@@ -161,7 +165,9 @@ class ConfigManager:
return profiles return profiles
def get_github_token(self, profile_name: str | None = None, repo_url: str | None = None) -> str | None: def get_github_token(
self, profile_name: str | None = None, repo_url: str | None = None
) -> str | None:
""" """
Get GitHub token with smart fallback chain. Get GitHub token with smart fallback chain.
@@ -269,7 +275,11 @@ class ConfigManager:
2. Config file 2. Config file
""" """
# Check environment first # Check environment first
env_map = {"anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "openai": "OPENAI_API_KEY"} env_map = {
"anthropic": "ANTHROPIC_API_KEY",
"google": "GOOGLE_API_KEY",
"openai": "OPENAI_API_KEY",
}
env_var = env_map.get(provider) env_var = env_map.get(provider)
if env_var: if env_var:

View File

@@ -112,7 +112,9 @@ class ConfigValidator:
# Validate merge_mode (optional) # Validate merge_mode (optional)
merge_mode = self.config.get("merge_mode", "rule-based") merge_mode = self.config.get("merge_mode", "rule-based")
if merge_mode not in self.VALID_MERGE_MODES: if merge_mode not in self.VALID_MERGE_MODES:
raise ValueError(f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}") raise ValueError(
f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}"
)
# Validate each source # Validate each source
for i, source in enumerate(sources): for i, source in enumerate(sources):
@@ -130,7 +132,9 @@ class ConfigValidator:
source_type = source["type"] source_type = source["type"]
if source_type not in self.VALID_SOURCE_TYPES: if source_type not in self.VALID_SOURCE_TYPES:
raise ValueError(f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}") raise ValueError(
f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}"
)
# Type-specific validation # Type-specific validation
if source_type == "documentation": if source_type == "documentation":
@@ -147,7 +151,9 @@ class ConfigValidator:
# Optional but recommended fields # Optional but recommended fields
if "selectors" not in source: if "selectors" not in source:
logger.warning(f"Source {index} (documentation): No 'selectors' specified, using defaults") logger.warning(
f"Source {index} (documentation): No 'selectors' specified, using defaults"
)
if "max_pages" in source and not isinstance(source["max_pages"], int): if "max_pages" in source and not isinstance(source["max_pages"], int):
raise ValueError(f"Source {index} (documentation): 'max_pages' must be an integer") raise ValueError(f"Source {index} (documentation): 'max_pages' must be an integer")
@@ -178,8 +184,12 @@ class ConfigValidator:
raise ValueError(f"Source {index} (github): 'max_issues' must be an integer") raise ValueError(f"Source {index} (github): 'max_issues' must be an integer")
# Validate enable_codebase_analysis if specified (C3.5) # Validate enable_codebase_analysis if specified (C3.5)
if "enable_codebase_analysis" in source and not isinstance(source["enable_codebase_analysis"], bool): if "enable_codebase_analysis" in source and not isinstance(
raise ValueError(f"Source {index} (github): 'enable_codebase_analysis' must be a boolean") source["enable_codebase_analysis"], bool
):
raise ValueError(
f"Source {index} (github): 'enable_codebase_analysis' must be a boolean"
)
# Validate ai_mode if specified (C3.5) # Validate ai_mode if specified (C3.5)
if "ai_mode" in source: if "ai_mode" in source:
@@ -249,7 +259,10 @@ class ConfigValidator:
"description": self.config.get("description", "Documentation skill"), "description": self.config.get("description", "Documentation skill"),
"merge_mode": "rule-based", "merge_mode": "rule-based",
"sources": [ "sources": [
{"type": "documentation", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}} {
"type": "documentation",
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
}
], ],
} }
return unified return unified
@@ -261,7 +274,10 @@ class ConfigValidator:
"description": self.config.get("description", "GitHub repository skill"), "description": self.config.get("description", "GitHub repository skill"),
"merge_mode": "rule-based", "merge_mode": "rule-based",
"sources": [ "sources": [
{"type": "github", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}} {
"type": "github",
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
}
], ],
} }
return unified return unified
@@ -272,7 +288,12 @@ class ConfigValidator:
"name": self.config.get("name", "unnamed"), "name": self.config.get("name", "unnamed"),
"description": self.config.get("description", "PDF document skill"), "description": self.config.get("description", "PDF document skill"),
"merge_mode": "rule-based", "merge_mode": "rule-based",
"sources": [{"type": "pdf", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}], "sources": [
{
"type": "pdf",
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
}
],
} }
return unified return unified
@@ -312,11 +333,13 @@ class ConfigValidator:
return False return False
has_docs_api = any( has_docs_api = any(
s.get("type") == "documentation" and s.get("extract_api", True) for s in self.config["sources"] s.get("type") == "documentation" and s.get("extract_api", True)
for s in self.config["sources"]
) )
has_github_code = any( has_github_code = any(
s.get("type") == "github" and s.get("include_code", False) for s in self.config["sources"] s.get("type") == "github" and s.get("include_code", False)
for s in self.config["sources"]
) )
return has_docs_api and has_github_code return has_docs_api and has_github_code

View File

@@ -451,7 +451,12 @@ class ConflictDetector:
} }
# Count by type # Count by type
for conflict_type in ["missing_in_docs", "missing_in_code", "signature_mismatch", "description_mismatch"]: for conflict_type in [
"missing_in_docs",
"missing_in_code",
"signature_mismatch",
"description_mismatch",
]:
count = sum(1 for c in conflicts if c.type == conflict_type) count = sum(1 for c in conflicts if c.type == conflict_type)
summary["by_type"][conflict_type] = count summary["by_type"][conflict_type] = count
@@ -470,7 +475,10 @@ class ConflictDetector:
conflicts: List of Conflict objects conflicts: List of Conflict objects
output_path: Path to output JSON file output_path: Path to output JSON file
""" """
data = {"conflicts": [asdict(c) for c in conflicts], "summary": self.generate_summary(conflicts)} data = {
"conflicts": [asdict(c) for c in conflicts],
"summary": self.generate_summary(conflicts),
}
with open(output_path, "w", encoding="utf-8") as f: with open(output_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False) json.dump(data, f, indent=2, ensure_ascii=False)

View File

@@ -86,7 +86,9 @@ class DependencyAnalyzer:
def __init__(self): def __init__(self):
"""Initialize dependency analyzer.""" """Initialize dependency analyzer."""
if not NETWORKX_AVAILABLE: if not NETWORKX_AVAILABLE:
raise ImportError("NetworkX is required for dependency analysis. Install with: pip install networkx") raise ImportError(
"NetworkX is required for dependency analysis. Install with: pip install networkx"
)
self.graph = nx.DiGraph() # Directed graph for dependencies self.graph = nx.DiGraph() # Directed graph for dependencies
self.file_dependencies: dict[str, list[DependencyInfo]] = {} self.file_dependencies: dict[str, list[DependencyInfo]] = {}
@@ -130,7 +132,9 @@ class DependencyAnalyzer:
# Create file node # Create file node
imported_modules = [dep.imported_module for dep in deps] imported_modules = [dep.imported_module for dep in deps]
self.file_nodes[file_path] = FileNode(file_path=file_path, language=language, dependencies=imported_modules) self.file_nodes[file_path] = FileNode(
file_path=file_path, language=language, dependencies=imported_modules
)
return deps return deps
@@ -594,7 +598,9 @@ class DependencyAnalyzer:
if target and target in self.file_nodes: if target and target in self.file_nodes:
# Add edge from source to dependency # Add edge from source to dependency
self.graph.add_edge(file_path, target, import_type=dep.import_type, line_number=dep.line_number) self.graph.add_edge(
file_path, target, import_type=dep.import_type, line_number=dep.line_number
)
# Update imported_by lists # Update imported_by lists
if target in self.file_nodes: if target in self.file_nodes:
@@ -602,7 +608,9 @@ class DependencyAnalyzer:
return self.graph return self.graph
def _resolve_import(self, source_file: str, imported_module: str, is_relative: bool) -> str | None: def _resolve_import(
self, source_file: str, imported_module: str, is_relative: bool
) -> str | None:
""" """
Resolve import statement to actual file path. Resolve import statement to actual file path.
@@ -736,10 +744,14 @@ class DependencyAnalyzer:
"circular_dependencies": len(self.detect_cycles()), "circular_dependencies": len(self.detect_cycles()),
"strongly_connected_components": len(self.get_strongly_connected_components()), "strongly_connected_components": len(self.get_strongly_connected_components()),
"avg_dependencies_per_file": ( "avg_dependencies_per_file": (
self.graph.number_of_edges() / self.graph.number_of_nodes() if self.graph.number_of_nodes() > 0 else 0 self.graph.number_of_edges() / self.graph.number_of_nodes()
if self.graph.number_of_nodes() > 0
else 0
), ),
"files_with_no_dependencies": len( "files_with_no_dependencies": len(
[node for node in self.graph.nodes() if self.graph.out_degree(node) == 0] [node for node in self.graph.nodes() if self.graph.out_degree(node) == 0]
), ),
"files_not_imported": len([node for node in self.graph.nodes() if self.graph.in_degree(node) == 0]), "files_not_imported": len(
[node for node in self.graph.nodes() if self.graph.in_degree(node) == 0]
),
} }

View File

@@ -65,7 +65,9 @@ def setup_logging(verbose: bool = False, quiet: bool = False) -> None:
logging.basicConfig(level=level, format="%(message)s", force=True) logging.basicConfig(level=level, format="%(message)s", force=True)
def infer_description_from_docs(base_url: str, first_page_content: str | None = None, name: str = "") -> str: def infer_description_from_docs(
base_url: str, first_page_content: str | None = None, name: str = ""
) -> str:
""" """
Infer skill description from documentation metadata or first page content. Infer skill description from documentation metadata or first page content.
@@ -109,7 +111,13 @@ def infer_description_from_docs(base_url: str, first_page_content: str | None =
# Strategy 3: Extract first meaningful paragraph from main content # Strategy 3: Extract first meaningful paragraph from main content
# Look for common documentation main content areas # Look for common documentation main content areas
main_content = None main_content = None
for selector in ["article", "main", 'div[role="main"]', "div.content", "div.doc-content"]: for selector in [
"article",
"main",
'div[role="main"]',
"div.content",
"div.doc-content",
]:
main_content = soup.select_one(selector) main_content = soup.select_one(selector)
if main_content: if main_content:
break break
@@ -120,7 +128,8 @@ def infer_description_from_docs(base_url: str, first_page_content: str | None =
text = p.get_text().strip() text = p.get_text().strip()
# Skip empty, very short, or navigation-like paragraphs # Skip empty, very short, or navigation-like paragraphs
if len(text) > 30 and not any( if len(text) > 30 and not any(
skip in text.lower() for skip in ["table of contents", "on this page", "navigation"] skip in text.lower()
for skip in ["table of contents", "on this page", "navigation"]
): ):
# Clean and format # Clean and format
if len(text) > 150: if len(text) > 150:
@@ -160,7 +169,8 @@ class DocToSkillConverter:
skip_llms_txt_value = config.get("skip_llms_txt", False) skip_llms_txt_value = config.get("skip_llms_txt", False)
if not isinstance(skip_llms_txt_value, bool): if not isinstance(skip_llms_txt_value, bool):
logger.warning( logger.warning(
"Invalid value for 'skip_llms_txt': %r (expected bool). Defaulting to False.", skip_llms_txt_value "Invalid value for 'skip_llms_txt': %r (expected bool). Defaulting to False.",
skip_llms_txt_value,
) )
self.skip_llms_txt = False self.skip_llms_txt = False
else: else:
@@ -381,7 +391,15 @@ class DocToSkillConverter:
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"): if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
return self._extract_html_as_markdown(content, url) return self._extract_html_as_markdown(content, url)
page = {"url": url, "title": "", "content": "", "headings": [], "code_samples": [], "patterns": [], "links": []} page = {
"url": url,
"title": "",
"content": "",
"headings": [],
"code_samples": [],
"patterns": [],
"links": [],
}
lines = content.split("\n") lines = content.split("\n")
@@ -397,7 +415,9 @@ class DocToSkillConverter:
if match: if match:
level = len(match.group(1)) level = len(match.group(1))
text = match.group(2).strip() text = match.group(2).strip()
page["headings"].append({"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")}) page["headings"].append(
{"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")}
)
# Extract code blocks with language # Extract code blocks with language
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL) code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
@@ -464,7 +484,15 @@ class DocToSkillConverter:
Falls back to <body> if no semantic content container found. Falls back to <body> if no semantic content container found.
Language detection uses detect_language() method. Language detection uses detect_language() method.
""" """
page = {"url": url, "title": "", "content": "", "headings": [], "code_samples": [], "patterns": [], "links": []} page = {
"url": url,
"title": "",
"content": "",
"headings": [],
"code_samples": [],
"patterns": [],
"links": [],
}
soup = BeautifulSoup(html_content, "html.parser") soup = BeautifulSoup(html_content, "html.parser")
@@ -515,7 +543,9 @@ class DocToSkillConverter:
return lang # Return string for backward compatibility return lang # Return string for backward compatibility
def extract_patterns(self, main: Any, code_samples: list[dict[str, Any]]) -> list[dict[str, str]]: def extract_patterns(
self, main: Any, code_samples: list[dict[str, Any]]
) -> list[dict[str, str]]:
"""Extract common coding patterns (NEW FEATURE)""" """Extract common coding patterns (NEW FEATURE)"""
patterns = [] patterns = []
@@ -527,7 +557,10 @@ class DocToSkillConverter:
next_code = elem.find_next(["pre", "code"]) next_code = elem.find_next(["pre", "code"])
if next_code: if next_code:
patterns.append( patterns.append(
{"description": self.clean_text(elem.get_text()), "code": next_code.get_text().strip()} {
"description": self.clean_text(elem.get_text()),
"code": next_code.get_text().strip(),
}
) )
return patterns[:5] # Limit to 5 most relevant patterns return patterns[:5] # Limit to 5 most relevant patterns
@@ -615,7 +648,9 @@ class DocToSkillConverter:
logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e) logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e)
logger.error(" URL: %s", url) logger.error(" URL: %s", url)
async def scrape_page_async(self, url: str, semaphore: asyncio.Semaphore, client: httpx.AsyncClient) -> None: async def scrape_page_async(
self, url: str, semaphore: asyncio.Semaphore, client: httpx.AsyncClient
) -> None:
"""Scrape a single page asynchronously. """Scrape a single page asynchronously.
Args: Args:
@@ -682,7 +717,9 @@ class DocToSkillConverter:
md_url = f"{url}/index.html.md" md_url = f"{url}/index.html.md"
md_urls.append(md_url) md_urls.append(md_url)
logger.info(" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls)) logger.info(
" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls)
)
return md_urls return md_urls
# ORIGINAL _convert_to_md_urls (with HEAD request validation): # ORIGINAL _convert_to_md_urls (with HEAD request validation):
@@ -744,7 +781,9 @@ class DocToSkillConverter:
variants = detector.detect_all() variants = detector.detect_all()
if variants: if variants:
logger.info("\n🔍 Found %d total variant(s), downloading remaining...", len(variants)) logger.info(
"\n🔍 Found %d total variant(s), downloading remaining...", len(variants)
)
for variant_info in variants: for variant_info in variants:
url = variant_info["url"] url = variant_info["url"]
variant = variant_info["variant"] variant = variant_info["variant"]
@@ -759,7 +798,9 @@ class DocToSkillConverter:
if extra_content: if extra_content:
extra_filename = extra_downloader.get_proper_filename() extra_filename = extra_downloader.get_proper_filename()
extra_filepath = os.path.join(self.skill_dir, "references", extra_filename) extra_filepath = os.path.join(
self.skill_dir, "references", extra_filename
)
with open(extra_filepath, "w", encoding="utf-8") as f: with open(extra_filepath, "w", encoding="utf-8") as f:
f.write(extra_content) f.write(extra_content)
logger.info("%s (%d chars)", extra_filename, len(extra_content)) logger.info("%s (%d chars)", extra_filename, len(extra_content))
@@ -783,7 +824,9 @@ class DocToSkillConverter:
if self.is_valid_url(url) and url not in self.visited_urls: if self.is_valid_url(url) and url not in self.visited_urls:
self.pending_urls.append(url) self.pending_urls.append(url)
logger.info(" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls)) logger.info(
" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls)
)
# Return False to trigger HTML scraping with the populated pending_urls # Return False to trigger HTML scraping with the populated pending_urls
self.llms_txt_detected = True self.llms_txt_detected = True
@@ -824,7 +867,11 @@ class DocToSkillConverter:
if content: if content:
filename = downloader.get_proper_filename() filename = downloader.get_proper_filename()
downloaded[variant] = {"content": content, "filename": filename, "size": len(content)} downloaded[variant] = {
"content": content,
"filename": filename,
"size": len(content),
}
logger.info("%s (%d chars)", filename, len(content)) logger.info("%s (%d chars)", filename, len(content))
if not downloaded: if not downloaded:
@@ -902,7 +949,9 @@ class DocToSkillConverter:
if not self.dry_run and not self.skip_llms_txt: if not self.dry_run and not self.skip_llms_txt:
llms_result = self._try_llms_txt() llms_result = self._try_llms_txt()
if llms_result: if llms_result:
logger.info("\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant) logger.info(
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
)
self.save_summary() self.save_summary()
return return
@@ -953,7 +1002,9 @@ class DocToSkillConverter:
response = requests.get(url, headers=headers, timeout=10) response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")
main_selector = self.config.get("selectors", {}).get("main_content", 'div[role="main"]') main_selector = self.config.get("selectors", {}).get(
"main_content", 'div[role="main"]'
)
main = soup.select_one(main_selector) main = soup.select_one(main_selector)
if main: if main:
@@ -968,7 +1019,10 @@ class DocToSkillConverter:
self.scrape_page(url) self.scrape_page(url)
self.pages_scraped += 1 self.pages_scraped += 1
if self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0: if (
self.checkpoint_enabled
and self.pages_scraped % self.checkpoint_interval == 0
):
self.save_checkpoint() self.save_checkpoint()
if len(self.visited_urls) % 10 == 0: if len(self.visited_urls) % 10 == 0:
@@ -1019,7 +1073,10 @@ class DocToSkillConverter:
with self.lock: with self.lock:
self.pages_scraped += 1 self.pages_scraped += 1
if self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0: if (
self.checkpoint_enabled
and self.pages_scraped % self.checkpoint_interval == 0
):
self.save_checkpoint() self.save_checkpoint()
if self.pages_scraped % 10 == 0: if self.pages_scraped % 10 == 0:
@@ -1062,7 +1119,9 @@ class DocToSkillConverter:
if not self.dry_run and not self.skip_llms_txt: if not self.dry_run and not self.skip_llms_txt:
llms_result = self._try_llms_txt() llms_result = self._try_llms_txt()
if llms_result: if llms_result:
logger.info("\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant) logger.info(
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
)
self.save_summary() self.save_summary()
return return
@@ -1097,7 +1156,9 @@ class DocToSkillConverter:
semaphore = asyncio.Semaphore(self.workers) semaphore = asyncio.Semaphore(self.workers)
# Create shared HTTP client with connection pooling # Create shared HTTP client with connection pooling
async with httpx.AsyncClient(timeout=30.0, limits=httpx.Limits(max_connections=self.workers * 2)) as client: async with httpx.AsyncClient(
timeout=30.0, limits=httpx.Limits(max_connections=self.workers * 2)
) as client:
tasks = [] tasks = []
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
@@ -1120,7 +1181,9 @@ class DocToSkillConverter:
if self.dry_run: if self.dry_run:
logger.info(" [Preview] %s", url) logger.info(" [Preview] %s", url)
else: else:
task = asyncio.create_task(self.scrape_page_async(url, semaphore, client)) task = asyncio.create_task(
self.scrape_page_async(url, semaphore, client)
)
tasks.append(task) tasks.append(task)
# Wait for batch to complete before continuing # Wait for batch to complete before continuing
@@ -1145,7 +1208,9 @@ class DocToSkillConverter:
if self.dry_run: if self.dry_run:
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)) logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
if len(self.visited_urls) >= preview_limit: if len(self.visited_urls) >= preview_limit:
logger.info(" (showing first %d, actual scraping may find more)", int(preview_limit)) logger.info(
" (showing first %d, actual scraping may find more)", int(preview_limit)
)
logger.info("\n💡 To actually scrape, run without --dry-run") logger.info("\n💡 To actually scrape, run without --dry-run")
else: else:
logger.info("\n✅ Scraped %d pages (async mode)", len(self.visited_urls)) logger.info("\n✅ Scraped %d pages (async mode)", len(self.visited_urls))
@@ -1178,8 +1243,12 @@ class DocToSkillConverter:
with open(json_file, encoding="utf-8") as f: with open(json_file, encoding="utf-8") as f:
pages.append(json.load(f)) pages.append(json.load(f))
except Exception as e: except Exception as e:
logger.error("⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e) logger.error(
logger.error(" Suggestion: File may be corrupted, consider re-scraping with --fresh") "⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e
)
logger.error(
" Suggestion: File may be corrupted, consider re-scraping with --fresh"
)
return pages return pages
@@ -1197,7 +1266,9 @@ class DocToSkillConverter:
for page in pages: for page in pages:
url = page["url"].lower() url = page["url"].lower()
title = page["title"].lower() title = page["title"].lower()
content = page.get("content", "").lower()[:CONTENT_PREVIEW_LENGTH] # Check first N chars for categorization content = page.get("content", "").lower()[
:CONTENT_PREVIEW_LENGTH
] # Check first N chars for categorization
categorized = False categorized = False
@@ -1232,7 +1303,9 @@ class DocToSkillConverter:
for page in pages: for page in pages:
path = urlparse(page["url"]).path path = urlparse(page["url"]).path
segments = [s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]] segments = [
s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]
]
for seg in segments: for seg in segments:
url_segments[seg] += 1 url_segments[seg] += 1
@@ -1246,10 +1319,14 @@ class DocToSkillConverter:
categories[seg] = [seg] categories[seg] = [seg]
# Add common defaults # Add common defaults
if "tutorial" not in categories and any("tutorial" in url for url in [p["url"] for p in pages]): if "tutorial" not in categories and any(
"tutorial" in url for url in [p["url"] for p in pages]
):
categories["tutorials"] = ["tutorial", "guide", "getting-started"] categories["tutorials"] = ["tutorial", "guide", "getting-started"]
if "api" not in categories and any("api" in url or "reference" in url for url in [p["url"] for p in pages]): if "api" not in categories and any(
"api" in url or "reference" in url for url in [p["url"] for p in pages]
):
categories["api"] = ["api", "reference", "class"] categories["api"] = ["api", "reference", "class"]
return categories return categories
@@ -1551,12 +1628,16 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
# Validate name (alphanumeric, hyphens, underscores only) # Validate name (alphanumeric, hyphens, underscores only)
if "name" in config: if "name" in config:
if not re.match(r"^[a-zA-Z0-9_-]+$", config["name"]): if not re.match(r"^[a-zA-Z0-9_-]+$", config["name"]):
errors.append(f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)") errors.append(
f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)"
)
# Validate base_url # Validate base_url
if "base_url" in config: if "base_url" in config:
if not config["base_url"].startswith(("http://", "https://")): if not config["base_url"].startswith(("http://", "https://")):
errors.append(f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)") errors.append(
f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)"
)
# Validate selectors structure # Validate selectors structure
if "selectors" in config: if "selectors" in config:
@@ -1596,7 +1677,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
if rate < 0: if rate < 0:
errors.append(f"'rate_limit' must be non-negative (got {rate})") errors.append(f"'rate_limit' must be non-negative (got {rate})")
elif rate > 10: elif rate > 10:
warnings.append(f"'rate_limit' is very high ({rate}s) - this may slow down scraping significantly") warnings.append(
f"'rate_limit' is very high ({rate}s) - this may slow down scraping significantly"
)
except (ValueError, TypeError): except (ValueError, TypeError):
errors.append(f"'rate_limit' must be a number (got {config['rate_limit']})") errors.append(f"'rate_limit' must be a number (got {config['rate_limit']})")
@@ -1606,19 +1689,29 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
# Allow None for unlimited # Allow None for unlimited
if max_p_value is None: if max_p_value is None:
warnings.append("'max_pages' is None (unlimited) - this will scrape ALL pages. Use with caution!") warnings.append(
"'max_pages' is None (unlimited) - this will scrape ALL pages. Use with caution!"
)
else: else:
try: try:
max_p = int(max_p_value) max_p = int(max_p_value)
# Allow -1 for unlimited # Allow -1 for unlimited
if max_p == -1: if max_p == -1:
warnings.append("'max_pages' is -1 (unlimited) - this will scrape ALL pages. Use with caution!") warnings.append(
"'max_pages' is -1 (unlimited) - this will scrape ALL pages. Use with caution!"
)
elif max_p < 1: elif max_p < 1:
errors.append(f"'max_pages' must be at least 1 or -1 for unlimited (got {max_p})") errors.append(
f"'max_pages' must be at least 1 or -1 for unlimited (got {max_p})"
)
elif max_p > MAX_PAGES_WARNING_THRESHOLD: elif max_p > MAX_PAGES_WARNING_THRESHOLD:
warnings.append(f"'max_pages' is very high ({max_p}) - scraping may take a very long time") warnings.append(
f"'max_pages' is very high ({max_p}) - scraping may take a very long time"
)
except (ValueError, TypeError): except (ValueError, TypeError):
errors.append(f"'max_pages' must be an integer, -1, or null (got {config['max_pages']})") errors.append(
f"'max_pages' must be an integer, -1, or null (got {config['max_pages']})"
)
# Validate start_urls if present # Validate start_urls if present
if "start_urls" in config: if "start_urls" in config:
@@ -1627,7 +1720,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
else: else:
for url in config["start_urls"]: for url in config["start_urls"]:
if not url.startswith(("http://", "https://")): if not url.startswith(("http://", "https://")):
errors.append(f"Invalid start_url: '{url}' (must start with http:// or https://)") errors.append(
f"Invalid start_url: '{url}' (must start with http:// or https://)"
)
return errors, warnings return errors, warnings
@@ -1716,7 +1811,9 @@ def interactive_config() -> dict[str, Any]:
# Selectors # Selectors
logger.info("\nCSS Selectors (press Enter for defaults):") logger.info("\nCSS Selectors (press Enter for defaults):")
selectors = {} selectors = {}
selectors["main_content"] = input(" Main content [div[role='main']]: ").strip() or "div[role='main']" selectors["main_content"] = (
input(" Main content [div[role='main']]: ").strip() or "div[role='main']"
)
selectors["title"] = input(" Title [title]: ").strip() or "title" selectors["title"] = input(" Title [title]: ").strip() or "title"
selectors["code_blocks"] = input(" Code blocks [pre code]: ").strip() or "pre code" selectors["code_blocks"] = input(" Code blocks [pre code]: ").strip() or "pre code"
config["selectors"] = selectors config["selectors"] = selectors
@@ -1782,15 +1879,27 @@ def setup_argument_parser() -> argparse.ArgumentParser:
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
) )
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive configuration mode") parser.add_argument(
parser.add_argument("--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)") "--interactive", "-i", action="store_true", help="Interactive configuration mode"
)
parser.add_argument(
"--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)"
)
parser.add_argument("--name", type=str, help="Skill name") parser.add_argument("--name", type=str, help="Skill name")
parser.add_argument("--url", type=str, help="Base documentation URL") parser.add_argument("--url", type=str, help="Base documentation URL")
parser.add_argument("--description", "-d", type=str, help="Skill description") parser.add_argument("--description", "-d", type=str, help="Skill description")
parser.add_argument("--skip-scrape", action="store_true", help="Skip scraping, use existing data")
parser.add_argument("--dry-run", action="store_true", help="Preview what will be scraped without actually scraping")
parser.add_argument( parser.add_argument(
"--enhance", action="store_true", help="Enhance SKILL.md using Claude API after building (requires API key)" "--skip-scrape", action="store_true", help="Skip scraping, use existing data"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Preview what will be scraped without actually scraping",
)
parser.add_argument(
"--enhance",
action="store_true",
help="Enhance SKILL.md using Claude API after building (requires API key)",
) )
parser.add_argument( parser.add_argument(
"--enhance-local", "--enhance-local",
@@ -1802,8 +1911,14 @@ def setup_argument_parser() -> argparse.ArgumentParser:
action="store_true", action="store_true",
help="Open terminal window for enhancement (use with --enhance-local)", help="Open terminal window for enhancement (use with --enhance-local)",
) )
parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)") parser.add_argument(
parser.add_argument("--resume", action="store_true", help="Resume from last checkpoint (for interrupted scrapes)") "--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
)
parser.add_argument(
"--resume",
action="store_true",
help="Resume from last checkpoint (for interrupted scrapes)",
)
parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh") parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh")
parser.add_argument( parser.add_argument(
"--rate-limit", "--rate-limit",
@@ -1826,10 +1941,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
help="Enable async mode for better parallel performance (2-3x faster than threads)", help="Enable async mode for better parallel performance (2-3x faster than threads)",
) )
parser.add_argument( parser.add_argument(
"--no-rate-limit", action="store_true", help="Disable rate limiting completely (same as --rate-limit 0)" "--no-rate-limit",
action="store_true",
help="Disable rate limiting completely (same as --rate-limit 0)",
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)"
)
parser.add_argument(
"--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)"
) )
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)")
parser.add_argument("--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)")
return parser return parser
@@ -1866,7 +1987,11 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
"name": args.name, "name": args.name,
"description": args.description or f"Use when working with {args.name}", "description": args.description or f"Use when working with {args.name}",
"base_url": args.url, "base_url": args.url,
"selectors": {"main_content": "div[role='main']", "title": "title", "code_blocks": "pre code"}, "selectors": {
"main_content": "div[role='main']",
"title": "title",
"code_blocks": "pre code",
},
"url_patterns": {"include": [], "exclude": []}, "url_patterns": {"include": [], "exclude": []},
"rate_limit": DEFAULT_RATE_LIMIT, "rate_limit": DEFAULT_RATE_LIMIT,
"max_pages": DEFAULT_MAX_PAGES, "max_pages": DEFAULT_MAX_PAGES,
@@ -1903,12 +2028,16 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
if config.get("workers", 1) > 1: if config.get("workers", 1) > 1:
logger.info("⚡ Async mode enabled (2-3x faster than threads)") logger.info("⚡ Async mode enabled (2-3x faster than threads)")
else: else:
logger.warning("⚠️ Async mode enabled but workers=1. Consider using --workers 4 for better performance") logger.warning(
"⚠️ Async mode enabled but workers=1. Consider using --workers 4 for better performance"
)
return config return config
def execute_scraping_and_building(config: dict[str, Any], args: argparse.Namespace) -> Optional["DocToSkillConverter"]: def execute_scraping_and_building(
config: dict[str, Any], args: argparse.Namespace
) -> Optional["DocToSkillConverter"]:
"""Execute the scraping and skill building process. """Execute the scraping and skill building process.
Handles dry run mode, existing data checks, scraping with checkpoints, Handles dry run mode, existing data checks, scraping with checkpoints,
@@ -1995,7 +2124,10 @@ def execute_scraping_and_building(config: dict[str, Any], args: argparse.Namespa
if converter.checkpoint_enabled: if converter.checkpoint_enabled:
converter.save_checkpoint() converter.save_checkpoint()
logger.info("💾 Progress saved to checkpoint") logger.info("💾 Progress saved to checkpoint")
logger.info(" Resume with: --config %s --resume", args.config if args.config else "config.json") logger.info(
" Resume with: --config %s --resume",
args.config if args.config else "config.json",
)
response = input("Continue with skill building? (y/n): ").strip().lower() response = input("Continue with skill building? (y/n): ").strip().lower()
if response != "y": if response != "y":
return None return None
@@ -2086,7 +2218,9 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
logger.info(" or re-run with: --enhance-local") logger.info(" or re-run with: --enhance-local")
logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"]) logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"])
logger.info(" or re-run with: --enhance") logger.info(" or re-run with: --enhance")
logger.info("\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window") logger.info(
"\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window"
)
def main() -> None: def main() -> None:

View File

@@ -41,7 +41,9 @@ class SkillEnhancer:
self.skill_md_path = self.skill_dir / "SKILL.md" self.skill_md_path = self.skill_dir / "SKILL.md"
# Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN # Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN") self.api_key = (
api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN")
)
if not self.api_key: if not self.api_key:
raise ValueError( raise ValueError(
"No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN " "No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
@@ -174,7 +176,9 @@ This skill combines knowledge from {len(sources_found)} source type(s):
if repo_id: if repo_id:
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n" prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
else: else:
prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n" prompt += (
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
)
prompt += f"```markdown\n{content}\n```\n" prompt += f"```markdown\n{content}\n```\n"
prompt += """ prompt += """
@@ -295,7 +299,9 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
# Read reference files # Read reference files
print("📖 Reading reference documentation...") print("📖 Reading reference documentation...")
references = read_reference_files(self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT) references = read_reference_files(
self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT
)
if not references: if not references:
print("❌ No reference files found to analyze") print("❌ No reference files found to analyze")
@@ -334,7 +340,9 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
print("\n✅ Enhancement complete!") print("\n✅ Enhancement complete!")
print("\nNext steps:") print("\nNext steps:")
print(f" 1. Review: {self.skill_md_path}") print(f" 1. Review: {self.skill_md_path}")
print(f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}") print(
f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}"
)
print(" 3. Package your skill:") print(" 3. Package your skill:")
print(f" skill-seekers package {self.skill_dir}/") print(f" skill-seekers package {self.skill_dir}/")
@@ -367,15 +375,21 @@ Examples:
""", """,
) )
parser.add_argument("skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)") parser.add_argument(
parser.add_argument("--api-key", type=str, help="Platform API key (or set environment variable)") "skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)"
)
parser.add_argument(
"--api-key", type=str, help="Platform API key (or set environment variable)"
)
parser.add_argument( parser.add_argument(
"--target", "--target",
choices=["claude", "gemini", "openai"], choices=["claude", "gemini", "openai"],
default="claude", default="claude",
help="Target LLM platform (default: claude)", help="Target LLM platform (default: claude)",
) )
parser.add_argument("--dry-run", action="store_true", help="Show what would be done without calling API") parser.add_argument(
"--dry-run", action="store_true", help="Show what would be done without calling API"
)
args = parser.parse_args() args = parser.parse_args()
@@ -447,7 +461,9 @@ Examples:
print("\n✅ Enhancement complete!") print("\n✅ Enhancement complete!")
print("\nNext steps:") print("\nNext steps:")
print(f" 1. Review: {Path(skill_dir) / 'SKILL.md'}") print(f" 1. Review: {Path(skill_dir) / 'SKILL.md'}")
print(f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}") print(
f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}"
)
print(" 3. Package your skill:") print(" 3. Package your skill:")
print(f" skill-seekers package {skill_dir}/ --target {args.target}") print(f" skill-seekers package {skill_dir}/ --target {args.target}")

View File

@@ -216,7 +216,9 @@ class LocalSkillEnhancer:
if use_summarization or total_ref_size > 30000: if use_summarization or total_ref_size > 30000:
if not use_summarization: if not use_summarization:
print(f" ⚠️ Large skill detected ({total_ref_size:,} chars)") print(f" ⚠️ Large skill detected ({total_ref_size:,} chars)")
print(f" 📊 Applying smart summarization (target: {int(summarization_ratio * 100)}% of original)") print(
f" 📊 Applying smart summarization (target: {int(summarization_ratio * 100)}% of original)"
)
print() print()
# Summarize each reference # Summarize each reference
@@ -307,7 +309,9 @@ REFERENCE DOCUMENTATION:
if repo_id: if repo_id:
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n" prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
else: else:
prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n" prompt += (
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
)
prompt += f"{content}\n" prompt += f"{content}\n"
prompt += f""" prompt += f"""
@@ -528,7 +532,9 @@ After writing, the file SKILL.md should:
return False return False
# Save prompt to temp file # Save prompt to temp file
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f: with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
) as f:
prompt_file = f.name prompt_file = f.name
f.write(prompt) f.write(prompt)
@@ -605,7 +611,9 @@ rm {prompt_file}
print(f" - Prompt file: {prompt_file}") print(f" - Prompt file: {prompt_file}")
print(f" - Skill directory: {self.skill_dir.absolute()}") print(f" - Skill directory: {self.skill_dir.absolute()}")
print(f" - SKILL.md will be saved to: {self.skill_md_path.absolute()}") print(f" - SKILL.md will be saved to: {self.skill_md_path.absolute()}")
print(f" - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}") print(
f" - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}"
)
print() print()
print("⏳ Wait for Claude Code to finish in the other terminal...") print("⏳ Wait for Claude Code to finish in the other terminal...")
print(" (Usually takes 30-60 seconds)") print(" (Usually takes 30-60 seconds)")
@@ -782,7 +790,9 @@ rm {prompt_file}
return return
# Save prompt to temp file # Save prompt to temp file
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f: with tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False, encoding="utf-8"
) as f:
prompt_file = f.name prompt_file = f.name
f.write(prompt) f.write(prompt)
@@ -791,7 +801,9 @@ rm {prompt_file}
# Run enhancement # Run enhancement
if headless: if headless:
# Run headless (subprocess.run - blocking in thread) # Run headless (subprocess.run - blocking in thread)
result = subprocess.run(["claude", prompt_file], capture_output=True, text=True, timeout=timeout) result = subprocess.run(
["claude", prompt_file], capture_output=True, text=True, timeout=timeout
)
# Clean up # Clean up
try: try:
@@ -800,9 +812,13 @@ rm {prompt_file}
pass pass
if result.returncode == 0: if result.returncode == 0:
self.write_status("completed", "Enhancement completed successfully!", progress=1.0) self.write_status(
"completed", "Enhancement completed successfully!", progress=1.0
)
else: else:
self.write_status("failed", error=f"Claude returned error: {result.returncode}") self.write_status(
"failed", error=f"Claude returned error: {result.returncode}"
)
else: else:
# Terminal mode in background doesn't make sense # Terminal mode in background doesn't make sense
self.write_status("failed", error="Terminal mode not supported in background") self.write_status("failed", error="Terminal mode not supported in background")
@@ -951,7 +967,10 @@ except Exception as e:
# Normal mode: Log to file # Normal mode: Log to file
with open(log_file, "w") as log: with open(log_file, "w") as log:
subprocess.Popen( subprocess.Popen(
["nohup", "python3", str(daemon_script_path)], stdout=log, stderr=log, start_new_session=True ["nohup", "python3", str(daemon_script_path)],
stdout=log,
stderr=log,
start_new_session=True,
) )
# Give daemon time to start # Give daemon time to start
@@ -1033,10 +1052,14 @@ Force Mode (Default ON):
) )
parser.add_argument( parser.add_argument(
"--background", action="store_true", help="Run in background and return immediately (non-blocking)" "--background",
action="store_true",
help="Run in background and return immediately (non-blocking)",
) )
parser.add_argument("--daemon", action="store_true", help="Run as persistent daemon process (fully detached)") parser.add_argument(
"--daemon", action="store_true", help="Run as persistent daemon process (fully detached)"
)
parser.add_argument( parser.add_argument(
"--no-force", "--no-force",
@@ -1045,7 +1068,10 @@ Force Mode (Default ON):
) )
parser.add_argument( parser.add_argument(
"--timeout", type=int, default=600, help="Timeout in seconds for headless mode (default: 600 = 10 minutes)" "--timeout",
type=int,
default=600,
help="Timeout in seconds for headless mode (default: 600 = 10 minutes)",
) )
args = parser.parse_args() args = parser.parse_args()
@@ -1053,7 +1079,9 @@ Force Mode (Default ON):
# Validate mutually exclusive options # Validate mutually exclusive options
mode_count = sum([args.interactive_enhancement, args.background, args.daemon]) mode_count = sum([args.interactive_enhancement, args.background, args.daemon])
if mode_count > 1: if mode_count > 1:
print("❌ Error: --interactive-enhancement, --background, and --daemon are mutually exclusive") print(
"❌ Error: --interactive-enhancement, --background, and --daemon are mutually exclusive"
)
print(" Choose only one mode") print(" Choose only one mode")
sys.exit(1) sys.exit(1)
@@ -1061,7 +1089,9 @@ Force Mode (Default ON):
# Force mode is ON by default, use --no-force to disable # Force mode is ON by default, use --no-force to disable
enhancer = LocalSkillEnhancer(args.skill_directory, force=not args.no_force) enhancer = LocalSkillEnhancer(args.skill_directory, force=not args.no_force)
headless = not args.interactive_enhancement # Invert: default is headless headless = not args.interactive_enhancement # Invert: default is headless
success = enhancer.run(headless=headless, timeout=args.timeout, background=args.background, daemon=args.daemon) success = enhancer.run(
headless=headless, timeout=args.timeout, background=args.background, daemon=args.daemon
)
sys.exit(0 if success else 1) sys.exit(0 if success else 1)

View File

@@ -149,12 +149,17 @@ Examples:
parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)") parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)")
parser.add_argument( parser.add_argument(
"--watch", "-w", action="store_true", help="Watch status in real-time (updates every 2 seconds)" "--watch",
"-w",
action="store_true",
help="Watch status in real-time (updates every 2 seconds)",
) )
parser.add_argument("--json", action="store_true", help="Output raw JSON (for scripting)") parser.add_argument("--json", action="store_true", help="Output raw JSON (for scripting)")
parser.add_argument("--interval", type=int, default=2, help="Watch update interval in seconds (default: 2)") parser.add_argument(
"--interval", type=int, default=2, help="Watch update interval in seconds (default: 2)"
)
args = parser.parse_args() args = parser.parse_args()

View File

@@ -17,7 +17,11 @@ from bs4 import BeautifulSoup
# Add parent directory to path for imports when run as script # Add parent directory to path for imports when run as script
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from skill_seekers.cli.constants import DEFAULT_MAX_DISCOVERY, DEFAULT_RATE_LIMIT, DISCOVERY_THRESHOLD from skill_seekers.cli.constants import (
DEFAULT_MAX_DISCOVERY,
DEFAULT_RATE_LIMIT,
DISCOVERY_THRESHOLD,
)
def estimate_pages(config, max_discovery=DEFAULT_MAX_DISCOVERY, timeout=30): def estimate_pages(config, max_discovery=DEFAULT_MAX_DISCOVERY, timeout=30):
@@ -306,7 +310,12 @@ def list_all_configs():
description = description[:57] + "..." description = description[:57] + "..."
by_category[category].append( by_category[category].append(
{"file": config_file.name, "path": str(rel_path), "name": name, "description": description} {
"file": config_file.name,
"path": str(rel_path),
"name": name,
"description": description,
}
) )
except Exception as e: except Exception as e:
# If we can't parse the config, just use the filename # If we can't parse the config, just use the filename
@@ -366,7 +375,11 @@ Examples:
) )
parser.add_argument("config", nargs="?", help="Path to config JSON file") parser.add_argument("config", nargs="?", help="Path to config JSON file")
parser.add_argument("--all", action="store_true", help="List all available configs from api/configs_repo/official/") parser.add_argument(
"--all",
action="store_true",
help="List all available configs from api/configs_repo/official/",
)
parser.add_argument( parser.add_argument(
"--max-discovery", "--max-discovery",
"-m", "-m",
@@ -380,7 +393,13 @@ Examples:
action="store_true", action="store_true",
help="Remove discovery limit - discover all pages (same as --max-discovery -1)", help="Remove discovery limit - discover all pages (same as --max-discovery -1)",
) )
parser.add_argument("--timeout", "-t", type=int, default=30, help="HTTP request timeout in seconds (default: 30)") parser.add_argument(
"--timeout",
"-t",
type=int,
default=30,
help="HTTP request timeout in seconds (default: 30)",
)
args = parser.parse_args() args = parser.parse_args()

View File

@@ -35,7 +35,10 @@ class RouterGenerator:
"""Generates router skills that direct to specialized sub-skills with GitHub integration""" """Generates router skills that direct to specialized sub-skills with GitHub integration"""
def __init__( def __init__(
self, config_paths: list[str], router_name: str = None, github_streams: Optional["ThreeStreamData"] = None self,
config_paths: list[str],
router_name: str = None,
github_streams: Optional["ThreeStreamData"] = None,
): ):
""" """
Initialize router generator with optional GitHub streams. Initialize router generator with optional GitHub streams.
@@ -124,7 +127,10 @@ class RouterGenerator:
label = label_info["label"].lower() label = label_info["label"].lower()
# Check if label relates to any skill keyword # Check if label relates to any skill keyword
if any(keyword.lower() in label or label in keyword.lower() for keyword in skill_keywords): if any(
keyword.lower() in label or label in keyword.lower()
for keyword in skill_keywords
):
# Add twice for 2x weight # Add twice for 2x weight
keywords.append(label) keywords.append(label)
keywords.append(label) keywords.append(label)
@@ -217,9 +223,13 @@ class RouterGenerator:
if unique_topics: if unique_topics:
topics_str = ", ".join(unique_topics) topics_str = ", ".join(unique_topics)
description = f"{self.router_name.title()} framework. Use when working with: {topics_str}" description = (
f"{self.router_name.title()} framework. Use when working with: {topics_str}"
)
else: else:
description = f"Use when working with {self.router_name.title()} development and programming" description = (
f"Use when working with {self.router_name.title()} development and programming"
)
# Truncate to 200 chars for performance (agentskills.io recommendation) # Truncate to 200 chars for performance (agentskills.io recommendation)
if len(description) > 200: if len(description) > 200:
@@ -357,7 +367,9 @@ compatibility: {compatibility}
topic = self._extract_topic_from_skill(first_skill) topic = self._extract_topic_from_skill(first_skill)
keyword = first_keywords[0] if first_keywords else topic keyword = first_keywords[0] if first_keywords else topic
examples.append(f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill') examples.append(
f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill'
)
# Example 2: Different skill (second sub-skill if available) # Example 2: Different skill (second sub-skill if available)
if len(skill_names) >= 2: if len(skill_names) >= 2:
@@ -434,7 +446,9 @@ compatibility: {compatibility}
f"**A:** Activates {skill_name} skill" f"**A:** Activates {skill_name} skill"
) )
return "\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords) return (
"\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords)
)
def _convert_issue_to_question(self, issue_title: str) -> str: def _convert_issue_to_question(self, issue_title: str) -> str:
""" """
@@ -492,7 +506,9 @@ compatibility: {compatibility}
patterns = [] patterns = []
# Top 5 closed issues with most engagement (comments indicate usefulness) # Top 5 closed issues with most engagement (comments indicate usefulness)
top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[:5] top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[
:5
]
for issue in top_solutions: for issue in top_solutions:
title = issue.get("title", "") title = issue.get("title", "")
@@ -1000,8 +1016,12 @@ GitHub issues related to this topic:
md = "# Common GitHub Issues\n\n" md = "# Common GitHub Issues\n\n"
md += "Top issues reported by the community:\n\n" md += "Top issues reported by the community:\n\n"
common_problems = self.github_issues.get("common_problems", [])[:10] if self.github_issues else [] common_problems = (
known_solutions = self.github_issues.get("known_solutions", [])[:10] if self.github_issues else [] self.github_issues.get("common_problems", [])[:10] if self.github_issues else []
)
known_solutions = (
self.github_issues.get("known_solutions", [])[:10] if self.github_issues else []
)
if common_problems: if common_problems:
md += "## Open Issues (Common Problems)\n\n" md += "## Open Issues (Common Problems)\n\n"

View File

@@ -77,7 +77,11 @@ class GitHubThreeStreamFetcher:
""" """
def __init__( def __init__(
self, repo_url: str, github_token: str | None = None, interactive: bool = True, profile_name: str | None = None self,
repo_url: str,
github_token: str | None = None,
interactive: bool = True,
profile_name: str | None = None,
): ):
""" """
Initialize fetcher. Initialize fetcher.
@@ -412,7 +416,9 @@ class GitHubThreeStreamFetcher:
continue continue
# Skip hidden files (but allow docs in docs/ directories) # Skip hidden files (but allow docs in docs/ directories)
is_in_docs_dir = any(pattern in str(file_path) for pattern in ["docs/", "doc/", "documentation/"]) is_in_docs_dir = any(
pattern in str(file_path) for pattern in ["docs/", "doc/", "documentation/"]
)
if any(part.startswith(".") for part in file_path.parts): if any(part.startswith(".") for part in file_path.parts):
if not is_in_docs_dir: if not is_in_docs_dir:
continue continue
@@ -495,9 +501,15 @@ class GitHubThreeStreamFetcher:
label_counts = Counter(all_labels) label_counts = Counter(all_labels)
return { return {
"common_problems": sorted(common_problems, key=lambda x: x["comments"], reverse=True)[:10], "common_problems": sorted(common_problems, key=lambda x: x["comments"], reverse=True)[
"known_solutions": sorted(known_solutions, key=lambda x: x["comments"], reverse=True)[:10], :10
"top_labels": [{"label": label, "count": count} for label, count in label_counts.most_common(10)], ],
"known_solutions": sorted(known_solutions, key=lambda x: x["comments"], reverse=True)[
:10
],
"top_labels": [
{"label": label, "count": count} for label, count in label_counts.most_common(10)
],
} }
def read_file(self, file_path: Path) -> str | None: def read_file(self, file_path: Path) -> str | None:

View File

@@ -178,7 +178,9 @@ class GitHubScraper:
self.repo_name = config["repo"] self.repo_name = config["repo"]
self.name = config.get("name", self.repo_name.split("/")[-1]) self.name = config.get("name", self.repo_name.split("/")[-1])
# Set initial description (will be improved after README extraction if not in config) # Set initial description (will be improved after README extraction if not in config)
self.description = config.get("description", f"Use when working with {self.repo_name.split('/')[-1]}") self.description = config.get(
"description", f"Use when working with {self.repo_name.split('/')[-1]}"
)
# Local repository path (optional - enables unlimited analysis) # Local repository path (optional - enables unlimited analysis)
self.local_repo_path = local_repo_path or config.get("local_repo_path") self.local_repo_path = local_repo_path or config.get("local_repo_path")
@@ -192,14 +194,18 @@ class GitHubScraper:
# Option 1: Replace mode - Use only specified exclusions # Option 1: Replace mode - Use only specified exclusions
if "exclude_dirs" in config: if "exclude_dirs" in config:
self.excluded_dirs = set(config["exclude_dirs"]) self.excluded_dirs = set(config["exclude_dirs"])
logger.warning(f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden") logger.warning(
f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden"
)
logger.debug(f"Custom exclusions: {sorted(self.excluded_dirs)}") logger.debug(f"Custom exclusions: {sorted(self.excluded_dirs)}")
# Option 2: Extend mode - Add to default exclusions # Option 2: Extend mode - Add to default exclusions
elif "exclude_dirs_additional" in config: elif "exclude_dirs_additional" in config:
additional = set(config["exclude_dirs_additional"]) additional = set(config["exclude_dirs_additional"])
self.excluded_dirs = self.excluded_dirs.union(additional) self.excluded_dirs = self.excluded_dirs.union(additional)
logger.info(f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})") logger.info(
f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})"
)
logger.debug(f"Additional exclusions: {sorted(additional)}") logger.debug(f"Additional exclusions: {sorted(additional)}")
# Load .gitignore for additional exclusions (C2.1) # Load .gitignore for additional exclusions (C2.1)
@@ -218,7 +224,9 @@ class GitHubScraper:
self.include_changelog = config.get("include_changelog", True) self.include_changelog = config.get("include_changelog", True)
self.include_releases = config.get("include_releases", True) self.include_releases = config.get("include_releases", True)
self.include_code = config.get("include_code", False) self.include_code = config.get("include_code", False)
self.code_analysis_depth = config.get("code_analysis_depth", "surface") # 'surface', 'deep', 'full' self.code_analysis_depth = config.get(
"code_analysis_depth", "surface"
) # 'surface', 'deep', 'full'
self.file_patterns = config.get("file_patterns", []) self.file_patterns = config.get("file_patterns", [])
# Initialize code analyzer if deep analysis requested # Initialize code analyzer if deep analysis requested
@@ -261,7 +269,9 @@ class GitHubScraper:
logger.warning("Using GitHub token from config file (less secure)") logger.warning("Using GitHub token from config file (less secure)")
return token return token
logger.warning("No GitHub token provided - using unauthenticated access (lower rate limits)") logger.warning(
"No GitHub token provided - using unauthenticated access (lower rate limits)"
)
return None return None
def scrape(self) -> dict[str, Any]: def scrape(self) -> dict[str, Any]:
@@ -334,7 +344,9 @@ class GitHubScraper:
"topics": self.repo.get_topics(), "topics": self.repo.get_topics(),
} }
logger.info(f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)") logger.info(
f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)"
)
except GithubException as e: except GithubException as e:
if e.status == 404: if e.status == 404:
@@ -378,7 +390,9 @@ class GitHubScraper:
file_size = getattr(content, "size", 0) file_size = getattr(content, "size", 0)
if download_url: if download_url:
logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...") logger.info(
f"File {file_path} is large ({file_size:,} bytes), downloading via URL..."
)
try: try:
import requests import requests
@@ -389,7 +403,9 @@ class GitHubScraper:
logger.warning(f"Failed to download {file_path} from {download_url}: {e}") logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
return None return None
else: else:
logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})") logger.warning(
f"File {file_path} has no download URL (encoding={content.encoding})"
)
return None return None
# Handle regular files - decode content # Handle regular files - decode content
@@ -419,7 +435,14 @@ class GitHubScraper:
logger.info("Extracting README...") logger.info("Extracting README...")
# Try common README locations # Try common README locations
readme_files = ["README.md", "README.rst", "README.txt", "README", "docs/README.md", ".github/README.md"] readme_files = [
"README.md",
"README.rst",
"README.txt",
"README",
"docs/README.md",
".github/README.md",
]
for readme_path in readme_files: for readme_path in readme_files:
readme_content = self._get_file_content(readme_path) readme_content = self._get_file_content(readme_path)
@@ -429,7 +452,9 @@ class GitHubScraper:
# Update description if not explicitly set in config # Update description if not explicitly set in config
if "description" not in self.config: if "description" not in self.config:
smart_description = extract_description_from_readme(self.extracted_data["readme"], self.repo_name) smart_description = extract_description_from_readme(
self.extracted_data["readme"], self.repo_name
)
self.description = smart_description self.description = smart_description
logger.debug(f"Generated description: {self.description}") logger.debug(f"Generated description: {self.description}")
@@ -465,7 +490,9 @@ class GitHubScraper:
self.extracted_data["languages"] = { self.extracted_data["languages"] = {
lang: { lang: {
"bytes": bytes_count, "bytes": bytes_count,
"percentage": round((bytes_count / total_bytes) * 100, 2) if total_bytes > 0 else 0, "percentage": round((bytes_count / total_bytes) * 100, 2)
if total_bytes > 0
else 0,
} }
for lang, bytes_count in languages.items() for lang, bytes_count in languages.items()
} }
@@ -502,7 +529,9 @@ class GitHubScraper:
# For directories, we need to check both with and without trailing slash # For directories, we need to check both with and without trailing slash
# as .gitignore patterns can match either way # as .gitignore patterns can match either way
dir_path_with_slash = dir_path if dir_path.endswith("/") else dir_path + "/" dir_path_with_slash = dir_path if dir_path.endswith("/") else dir_path + "/"
if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(dir_path_with_slash): if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(
dir_path_with_slash
):
logger.debug(f"Directory excluded by .gitignore: {dir_path}") logger.debug(f"Directory excluded by .gitignore: {dir_path}")
return True return True
@@ -555,7 +584,9 @@ class GitHubScraper:
return return
# Log exclusions for debugging # Log exclusions for debugging
logger.info(f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}") logger.info(
f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}"
)
file_tree = [] file_tree = []
excluded_count = 0 excluded_count = 0
@@ -594,7 +625,9 @@ class GitHubScraper:
file_tree.append({"path": file_path, "type": "file", "size": file_size}) file_tree.append({"path": file_path, "type": "file", "size": file_size})
self.extracted_data["file_tree"] = file_tree self.extracted_data["file_tree"] = file_tree
logger.info(f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)") logger.info(
f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)"
)
def _extract_file_tree_github(self): def _extract_file_tree_github(self):
"""Extract file tree from GitHub API (rate-limited).""" """Extract file tree from GitHub API (rate-limited)."""
@@ -695,10 +728,16 @@ class GitHubScraper:
file_content = self.repo.get_contents(file_path) file_content = self.repo.get_contents(file_path)
content = file_content.decoded_content.decode("utf-8") content = file_content.decoded_content.decode("utf-8")
analysis_result = self.code_analyzer.analyze_file(file_path, content, primary_language) analysis_result = self.code_analyzer.analyze_file(
file_path, content, primary_language
)
if analysis_result and (analysis_result.get("classes") or analysis_result.get("functions")): if analysis_result and (
analyzed_files.append({"file": file_path, "language": primary_language, **analysis_result}) analysis_result.get("classes") or analysis_result.get("functions")
):
analyzed_files.append(
{"file": file_path, "language": primary_language, **analysis_result}
)
logger.debug( logger.debug(
f"Analyzed {file_path}: " f"Analyzed {file_path}: "
@@ -805,7 +844,9 @@ class GitHubScraper:
"draft": release.draft, "draft": release.draft,
"prerelease": release.prerelease, "prerelease": release.prerelease,
"created_at": release.created_at.isoformat() if release.created_at else None, "created_at": release.created_at.isoformat() if release.created_at else None,
"published_at": release.published_at.isoformat() if release.published_at else None, "published_at": release.published_at.isoformat()
if release.published_at
else None,
"url": release.html_url, "url": release.html_url,
"tarball_url": release.tarball_url, "tarball_url": release.tarball_url,
"zipball_url": release.zipball_url, "zipball_url": release.zipball_url,
@@ -973,13 +1014,21 @@ Use this skill when you need to:
if has_c3_data: if has_c3_data:
skill_content += "\n### Codebase Analysis References\n\n" skill_content += "\n### Codebase Analysis References\n\n"
if c3_data.get("patterns"): if c3_data.get("patterns"):
skill_content += "- `references/codebase_analysis/patterns/` - Design patterns detected\n" skill_content += (
"- `references/codebase_analysis/patterns/` - Design patterns detected\n"
)
if c3_data.get("test_examples"): if c3_data.get("test_examples"):
skill_content += "- `references/codebase_analysis/examples/` - Test examples extracted\n" skill_content += (
"- `references/codebase_analysis/examples/` - Test examples extracted\n"
)
if c3_data.get("config_patterns"): if c3_data.get("config_patterns"):
skill_content += "- `references/codebase_analysis/configuration/` - Configuration analysis\n" skill_content += (
"- `references/codebase_analysis/configuration/` - Configuration analysis\n"
)
if c3_data.get("architecture"): if c3_data.get("architecture"):
skill_content += "- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n" skill_content += (
"- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n"
)
# Usage # Usage
skill_content += "\n## 💻 Usage\n\n" skill_content += "\n## 💻 Usage\n\n"
@@ -1020,7 +1069,9 @@ Use this skill when you need to:
lines = [] lines = []
for release in releases[:3]: for release in releases[:3]:
lines.append(f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}") lines.append(
f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}"
)
return "\n".join(lines) return "\n".join(lines)
@@ -1132,7 +1183,9 @@ Use this skill when you need to:
if patterns: if patterns:
content += "**Architectural Patterns:**\n" content += "**Architectural Patterns:**\n"
for pattern in patterns[:5]: for pattern in patterns[:5]:
content += f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n" content += (
f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n"
)
content += "\n" content += "\n"
# Dependencies (C2.6) # Dependencies (C2.6)
@@ -1233,7 +1286,9 @@ Use this skill when you need to:
"""Generate releases.md reference file.""" """Generate releases.md reference file."""
releases = self.data["releases"] releases = self.data["releases"]
content = f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n" content = (
f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n"
)
for release in releases: for release in releases:
content += f"## {release['tag_name']}: {release['name']}\n" content += f"## {release['tag_name']}: {release['name']}\n"
@@ -1294,14 +1349,22 @@ Examples:
parser.add_argument("--max-issues", type=int, default=100, help="Max issues to fetch") parser.add_argument("--max-issues", type=int, default=100, help="Max issues to fetch")
parser.add_argument("--scrape-only", action="store_true", help="Only scrape, don't build skill") parser.add_argument("--scrape-only", action="store_true", help="Only scrape, don't build skill")
parser.add_argument( parser.add_argument(
"--enhance", action="store_true", help="Enhance SKILL.md using Claude API after building (requires API key)" "--enhance",
action="store_true",
help="Enhance SKILL.md using Claude API after building (requires API key)",
) )
parser.add_argument( parser.add_argument(
"--enhance-local", action="store_true", help="Enhance SKILL.md using Claude Code (no API key needed)" "--enhance-local",
action="store_true",
help="Enhance SKILL.md using Claude Code (no API key needed)",
) )
parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)")
parser.add_argument( parser.add_argument(
"--non-interactive", action="store_true", help="Non-interactive mode for CI/CD (fail fast on rate limits)" "--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
)
parser.add_argument(
"--non-interactive",
action="store_true",
help="Non-interactive mode for CI/CD (fail fast on rate limits)",
) )
parser.add_argument("--profile", type=str, help="GitHub profile name to use from config") parser.add_argument("--profile", type=str, help="GitHub profile name to use from config")
@@ -1368,7 +1431,9 @@ Examples:
api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY") api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY")
if not api_key: if not api_key:
logger.error("❌ ANTHROPIC_API_KEY not set. Use --api-key or set environment variable.") logger.error(
"❌ ANTHROPIC_API_KEY not set. Use --api-key or set environment variable."
)
logger.info("💡 Tip: Use --enhance-local instead (no API key needed)") logger.info("💡 Tip: Use --enhance-local instead (no API key needed)")
else: else:
# Import and run API enhancement # Import and run API enhancement
@@ -1378,7 +1443,9 @@ Examples:
enhance_skill_md(skill_dir, api_key) enhance_skill_md(skill_dir, api_key)
logger.info("✅ API enhancement complete!") logger.info("✅ API enhancement complete!")
except ImportError: except ImportError:
logger.error("❌ API enhancement not available. Install: pip install anthropic") logger.error(
"❌ API enhancement not available. Install: pip install anthropic"
)
logger.info("💡 Tip: Use --enhance-local instead (no API key needed)") logger.info("💡 Tip: Use --enhance-local instead (no API key needed)")
logger.info(f"\n✅ Success! Skill created at: {skill_dir}/") logger.info(f"\n✅ Success! Skill created at: {skill_dir}/")

View File

@@ -92,7 +92,9 @@ class GuideEnhancer:
self.client = anthropic.Anthropic(api_key=self.api_key) self.client = anthropic.Anthropic(api_key=self.api_key)
logger.info("✨ GuideEnhancer initialized in API mode") logger.info("✨ GuideEnhancer initialized in API mode")
else: else:
logger.warning("⚠️ API mode requested but anthropic library not available or no API key") logger.warning(
"⚠️ API mode requested but anthropic library not available or no API key"
)
self.mode = "none" self.mode = "none"
elif self.mode == "local": elif self.mode == "local":
# Check if claude CLI is available # Check if claude CLI is available
@@ -133,7 +135,9 @@ class GuideEnhancer:
def _check_claude_cli(self) -> bool: def _check_claude_cli(self) -> bool:
"""Check if Claude Code CLI is available.""" """Check if Claude Code CLI is available."""
try: try:
result = subprocess.run(["claude", "--version"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["claude", "--version"], capture_output=True, text=True, timeout=5
)
return result.returncode == 0 return result.returncode == 0
except (FileNotFoundError, subprocess.TimeoutExpired): except (FileNotFoundError, subprocess.TimeoutExpired):
return False return False
@@ -251,7 +255,9 @@ class GuideEnhancer:
try: try:
data = json.loads(response) data = json.loads(response)
return [ return [
PrerequisiteItem(name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", "")) PrerequisiteItem(
name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", "")
)
for item in data.get("prerequisites_detailed", []) for item in data.get("prerequisites_detailed", [])
] ]
except (json.JSONDecodeError, KeyError) as e: except (json.JSONDecodeError, KeyError) as e:
@@ -345,7 +351,9 @@ class GuideEnhancer:
try: try:
response = self.client.messages.create( response = self.client.messages.create(
model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}] model="claude-sonnet-4-20250514",
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
) )
return response.content[0].text return response.content[0].text
except Exception as e: except Exception as e:
@@ -690,7 +698,11 @@ IMPORTANT: Return ONLY valid JSON.
# Prerequisites # Prerequisites
if "prerequisites_detailed" in data: if "prerequisites_detailed" in data:
enhanced["prerequisites_detailed"] = [ enhanced["prerequisites_detailed"] = [
PrerequisiteItem(name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", "")) PrerequisiteItem(
name=item.get("name", ""),
why=item.get("why", ""),
setup=item.get("setup", ""),
)
for item in data["prerequisites_detailed"] for item in data["prerequisites_detailed"]
] ]

View File

@@ -140,7 +140,9 @@ class GuideCollection:
return { return {
"total_guides": self.total_guides, "total_guides": self.total_guides,
"guides_by_complexity": self.guides_by_complexity, "guides_by_complexity": self.guides_by_complexity,
"guides_by_use_case": {k: [g.to_dict() for g in v] for k, v in self.guides_by_use_case.items()}, "guides_by_use_case": {
k: [g.to_dict() for g in v] for k, v in self.guides_by_use_case.items()
},
"guides": [g.to_dict() for g in self.guides], "guides": [g.to_dict() for g in self.guides],
} }
@@ -224,7 +226,10 @@ class WorkflowAnalyzer:
steps.append( steps.append(
WorkflowStep( WorkflowStep(
step_number=step_num, code=step_code, description=description, verification=verification step_number=step_num,
code=step_code,
description=description,
verification=verification,
) )
) )
step_num += 1 step_num += 1
@@ -253,7 +258,9 @@ class WorkflowAnalyzer:
step_code = "\n".join(current_step) step_code = "\n".join(current_step)
description = self._infer_description_from_code(step_code) description = self._infer_description_from_code(step_code)
steps.append(WorkflowStep(step_number=step_num, code=step_code, description=description)) steps.append(
WorkflowStep(step_number=step_num, code=step_code, description=description)
)
step_num += 1 step_num += 1
current_step = [] current_step = []
continue continue
@@ -264,7 +271,9 @@ class WorkflowAnalyzer:
if current_step: if current_step:
step_code = "\n".join(current_step) step_code = "\n".join(current_step)
description = self._infer_description_from_code(step_code) description = self._infer_description_from_code(step_code)
steps.append(WorkflowStep(step_number=step_num, code=step_code, description=description)) steps.append(
WorkflowStep(step_number=step_num, code=step_code, description=description)
)
return steps return steps
@@ -400,7 +409,9 @@ class WorkflowAnalyzer:
class WorkflowGrouper: class WorkflowGrouper:
"""Group related workflows into coherent guides""" """Group related workflows into coherent guides"""
def group_workflows(self, workflows: list[dict], strategy: str = "ai-tutorial-group") -> dict[str, list[dict]]: def group_workflows(
self, workflows: list[dict], strategy: str = "ai-tutorial-group"
) -> dict[str, list[dict]]:
""" """
Group workflows using specified strategy. Group workflows using specified strategy.
@@ -854,7 +865,9 @@ class HowToGuideBuilder:
if not workflows: if not workflows:
logger.warning("No workflow examples found!") logger.warning("No workflow examples found!")
return GuideCollection(total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[]) return GuideCollection(
total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[]
)
# Group workflows # Group workflows
grouped_workflows = self.grouper.group_workflows(workflows, grouping_strategy) grouped_workflows = self.grouper.group_workflows(workflows, grouping_strategy)
@@ -914,7 +927,9 @@ class HowToGuideBuilder:
# Extract source files # Extract source files
source_files = [w.get("file_path", "") for w in workflows] source_files = [w.get("file_path", "") for w in workflows]
source_files = [f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows)] source_files = [
f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows)
]
# Create guide # Create guide
guide = HowToGuide( guide = HowToGuide(
@@ -1126,9 +1141,13 @@ Grouping Strategies:
""", """,
) )
parser.add_argument("input", nargs="?", help="Input: directory with test files OR test_examples.json file") parser.add_argument(
"input", nargs="?", help="Input: directory with test files OR test_examples.json file"
)
parser.add_argument("--input", dest="input_file", help="Input JSON file with test examples (from C3.2)") parser.add_argument(
"--input", dest="input_file", help="Input JSON file with test examples (from C3.2)"
)
parser.add_argument( parser.add_argument(
"--output", "--output",
@@ -1145,7 +1164,9 @@ Grouping Strategies:
parser.add_argument("--no-ai", action="store_true", help="Disable AI enhancement") parser.add_argument("--no-ai", action="store_true", help="Disable AI enhancement")
parser.add_argument("--json-output", action="store_true", help="Output JSON summary instead of markdown files") parser.add_argument(
"--json-output", action="store_true", help="Output JSON summary instead of markdown files"
)
args = parser.parse_args() args = parser.parse_args()
@@ -1191,7 +1212,9 @@ Grouping Strategies:
builder = HowToGuideBuilder(enhance_with_ai=not args.no_ai) builder = HowToGuideBuilder(enhance_with_ai=not args.no_ai)
output_dir = Path(args.output) if not args.json_output else None output_dir = Path(args.output) if not args.json_output else None
collection = builder.build_guides_from_examples(examples, grouping_strategy=args.group_by, output_dir=output_dir) collection = builder.build_guides_from_examples(
examples, grouping_strategy=args.group_by, output_dir=output_dir
)
# Output results # Output results
if args.json_output: if args.json_output:

View File

@@ -366,11 +366,17 @@ Supported agents:
parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)") parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)")
parser.add_argument("--agent", required=True, help="Agent name (use 'all' to install to all agents)") parser.add_argument(
"--agent", required=True, help="Agent name (use 'all' to install to all agents)"
)
parser.add_argument("--force", action="store_true", help="Overwrite existing installation without asking") parser.add_argument(
"--force", action="store_true", help="Overwrite existing installation without asking"
)
parser.add_argument("--dry-run", action="store_true", help="Preview installation without making changes") parser.add_argument(
"--dry-run", action="store_true", help="Preview installation without making changes"
)
args = parser.parse_args() args = parser.parse_args()
@@ -442,7 +448,9 @@ Supported agents:
if args.dry_run: if args.dry_run:
print("\n🔍 DRY RUN MODE - No changes will be made\n") print("\n🔍 DRY RUN MODE - No changes will be made\n")
success, message = install_to_agent(skill_dir, agent_name, force=args.force, dry_run=args.dry_run) success, message = install_to_agent(
skill_dir, agent_name, force=args.force, dry_run=args.dry_run
)
print(message) print(message)

View File

@@ -37,6 +37,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
# Import the MCP tool function (with lazy loading) # Import the MCP tool function (with lazy loading)
try: try:
from skill_seekers.mcp.server import install_skill_tool from skill_seekers.mcp.server import install_skill_tool
MCP_AVAILABLE = True MCP_AVAILABLE = True
except ImportError: except ImportError:
MCP_AVAILABLE = False MCP_AVAILABLE = False
@@ -99,15 +100,23 @@ Phases:
) )
parser.add_argument( parser.add_argument(
"--config", required=True, help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')" "--config",
required=True,
help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
) )
parser.add_argument("--destination", default="output", help="Output directory for skill files (default: output/)") parser.add_argument(
"--destination",
default="output",
help="Output directory for skill files (default: output/)",
)
parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude") parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude")
parser.add_argument( parser.add_argument(
"--unlimited", action="store_true", help="Remove page limits during scraping (WARNING: Can take hours)" "--unlimited",
action="store_true",
help="Remove page limits during scraping (WARNING: Can take hours)",
) )
parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing") parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing")

View File

@@ -17,10 +17,15 @@ logger = logging.getLogger(__name__)
try: try:
from skill_seekers.cli.swift_patterns import SWIFT_PATTERNS from skill_seekers.cli.swift_patterns import SWIFT_PATTERNS
except ImportError as e: except ImportError as e:
logger.warning("Swift language detection patterns unavailable. Swift code detection will be disabled. Error: %s", e) logger.warning(
"Swift language detection patterns unavailable. Swift code detection will be disabled. Error: %s",
e,
)
SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {} SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {}
except Exception as e: except Exception as e:
logger.error("Failed to load Swift patterns due to unexpected error: %s. Swift detection disabled.", e) logger.error(
"Failed to load Swift patterns due to unexpected error: %s. Swift detection disabled.", e
)
SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {} SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {}
# Verify Swift patterns were loaded correctly # Verify Swift patterns were loaded correctly
@@ -35,7 +40,8 @@ elif "swift" not in SWIFT_PATTERNS:
) )
else: else:
logger.info( logger.info(
"Swift patterns loaded successfully: %d patterns for language detection", len(SWIFT_PATTERNS.get("swift", [])) "Swift patterns loaded successfully: %d patterns for language detection",
len(SWIFT_PATTERNS.get("swift", [])),
) )
# Comprehensive language patterns with weighted confidence scoring # Comprehensive language patterns with weighted confidence scoring
@@ -473,7 +479,8 @@ class LanguageDetector:
self._pattern_cache[lang] = compiled_patterns self._pattern_cache[lang] = compiled_patterns
else: else:
logger.warning( logger.warning(
"No valid patterns compiled for language '%s'. Detection for this language is disabled.", lang "No valid patterns compiled for language '%s'. Detection for this language is disabled.",
lang,
) )
def detect_from_html(self, elem, code: str) -> tuple[str, float]: def detect_from_html(self, elem, code: str) -> tuple[str, float]:

View File

@@ -98,7 +98,9 @@ class LlmsTxtDownloader:
print(f" Retrying in {delay}s...") print(f" Retrying in {delay}s...")
time.sleep(delay) time.sleep(delay)
else: else:
print(f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}") print(
f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}"
)
return None return None
return None return None

View File

@@ -135,7 +135,11 @@ class LlmsTxtParser:
headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE) headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE)
for level_markers, text in headings: for level_markers, text in headings:
page["headings"].append( page["headings"].append(
{"level": f"h{len(level_markers)}", "text": text.strip(), "id": text.lower().replace(" ", "-")} {
"level": f"h{len(level_markers)}",
"text": text.strip(),
"id": text.lower().replace(" ", "-"),
}
) )
# Remove code blocks from content for plain text # Remove code blocks from content for plain text

View File

@@ -66,52 +66,79 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
parser.add_argument("--version", action="version", version="%(prog)s 2.7.0") parser.add_argument("--version", action="version", version="%(prog)s 2.7.0")
subparsers = parser.add_subparsers( subparsers = parser.add_subparsers(
dest="command", title="commands", description="Available Skill Seekers commands", help="Command to run" dest="command",
title="commands",
description="Available Skill Seekers commands",
help="Command to run",
) )
# === config subcommand === # === config subcommand ===
config_parser = subparsers.add_parser( config_parser = subparsers.add_parser(
"config", help="Configure GitHub tokens, API keys, and settings", description="Interactive configuration wizard" "config",
help="Configure GitHub tokens, API keys, and settings",
description="Interactive configuration wizard",
)
config_parser.add_argument(
"--github", action="store_true", help="Go directly to GitHub token setup"
)
config_parser.add_argument(
"--api-keys", action="store_true", help="Go directly to API keys setup"
)
config_parser.add_argument(
"--show", action="store_true", help="Show current configuration and exit"
) )
config_parser.add_argument("--github", action="store_true", help="Go directly to GitHub token setup")
config_parser.add_argument("--api-keys", action="store_true", help="Go directly to API keys setup")
config_parser.add_argument("--show", action="store_true", help="Show current configuration and exit")
config_parser.add_argument("--test", action="store_true", help="Test connections and exit") config_parser.add_argument("--test", action="store_true", help="Test connections and exit")
# === scrape subcommand === # === scrape subcommand ===
scrape_parser = subparsers.add_parser( scrape_parser = subparsers.add_parser(
"scrape", help="Scrape documentation website", description="Scrape documentation website and generate skill" "scrape",
help="Scrape documentation website",
description="Scrape documentation website and generate skill",
) )
scrape_parser.add_argument("--config", help="Config JSON file") scrape_parser.add_argument("--config", help="Config JSON file")
scrape_parser.add_argument("--name", help="Skill name") scrape_parser.add_argument("--name", help="Skill name")
scrape_parser.add_argument("--url", help="Documentation URL") scrape_parser.add_argument("--url", help="Documentation URL")
scrape_parser.add_argument("--description", help="Skill description") scrape_parser.add_argument("--description", help="Skill description")
scrape_parser.add_argument("--skip-scrape", action="store_true", help="Skip scraping, use cached data") scrape_parser.add_argument(
"--skip-scrape", action="store_true", help="Skip scraping, use cached data"
)
scrape_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)") scrape_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
scrape_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)") scrape_parser.add_argument(
"--enhance-local", action="store_true", help="AI enhancement (local)"
)
scrape_parser.add_argument("--dry-run", action="store_true", help="Dry run mode") scrape_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
scrape_parser.add_argument("--async", dest="async_mode", action="store_true", help="Use async scraping") scrape_parser.add_argument(
"--async", dest="async_mode", action="store_true", help="Use async scraping"
)
scrape_parser.add_argument("--workers", type=int, help="Number of async workers") scrape_parser.add_argument("--workers", type=int, help="Number of async workers")
# === github subcommand === # === github subcommand ===
github_parser = subparsers.add_parser( github_parser = subparsers.add_parser(
"github", help="Scrape GitHub repository", description="Scrape GitHub repository and generate skill" "github",
help="Scrape GitHub repository",
description="Scrape GitHub repository and generate skill",
) )
github_parser.add_argument("--config", help="Config JSON file") github_parser.add_argument("--config", help="Config JSON file")
github_parser.add_argument("--repo", help="GitHub repo (owner/repo)") github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
github_parser.add_argument("--name", help="Skill name") github_parser.add_argument("--name", help="Skill name")
github_parser.add_argument("--description", help="Skill description") github_parser.add_argument("--description", help="Skill description")
github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)") github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)") github_parser.add_argument(
"--enhance-local", action="store_true", help="AI enhancement (local)"
)
github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance") github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
github_parser.add_argument( github_parser.add_argument(
"--non-interactive", action="store_true", help="Non-interactive mode (fail fast on rate limits)" "--non-interactive",
action="store_true",
help="Non-interactive mode (fail fast on rate limits)",
) )
github_parser.add_argument("--profile", type=str, help="GitHub profile name from config") github_parser.add_argument("--profile", type=str, help="GitHub profile name from config")
# === pdf subcommand === # === pdf subcommand ===
pdf_parser = subparsers.add_parser( pdf_parser = subparsers.add_parser(
"pdf", help="Extract from PDF file", description="Extract content from PDF and generate skill" "pdf",
help="Extract from PDF file",
description="Extract content from PDF and generate skill",
) )
pdf_parser.add_argument("--config", help="Config JSON file") pdf_parser.add_argument("--config", help="Config JSON file")
pdf_parser.add_argument("--pdf", help="PDF file path") pdf_parser.add_argument("--pdf", help="PDF file path")
@@ -138,7 +165,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
enhance_parser.add_argument("skill_directory", help="Skill directory path") enhance_parser.add_argument("skill_directory", help="Skill directory path")
enhance_parser.add_argument("--background", action="store_true", help="Run in background") enhance_parser.add_argument("--background", action="store_true", help="Run in background")
enhance_parser.add_argument("--daemon", action="store_true", help="Run as daemon") enhance_parser.add_argument("--daemon", action="store_true", help="Run as daemon")
enhance_parser.add_argument("--no-force", action="store_true", help="Disable force mode (enable confirmations)") enhance_parser.add_argument(
"--no-force", action="store_true", help="Disable force mode (enable confirmations)"
)
enhance_parser.add_argument("--timeout", type=int, default=600, help="Timeout in seconds") enhance_parser.add_argument("--timeout", type=int, default=600, help="Timeout in seconds")
# === enhance-status subcommand === # === enhance-status subcommand ===
@@ -148,13 +177,19 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
description="Monitor background enhancement processes", description="Monitor background enhancement processes",
) )
enhance_status_parser.add_argument("skill_directory", help="Skill directory path") enhance_status_parser.add_argument("skill_directory", help="Skill directory path")
enhance_status_parser.add_argument("--watch", "-w", action="store_true", help="Watch in real-time") enhance_status_parser.add_argument(
"--watch", "-w", action="store_true", help="Watch in real-time"
)
enhance_status_parser.add_argument("--json", action="store_true", help="JSON output") enhance_status_parser.add_argument("--json", action="store_true", help="JSON output")
enhance_status_parser.add_argument("--interval", type=int, default=2, help="Watch interval in seconds") enhance_status_parser.add_argument(
"--interval", type=int, default=2, help="Watch interval in seconds"
)
# === package subcommand === # === package subcommand ===
package_parser = subparsers.add_parser( package_parser = subparsers.add_parser(
"package", help="Package skill into .zip file", description="Package skill directory into uploadable .zip" "package",
help="Package skill into .zip file",
description="Package skill directory into uploadable .zip",
) )
package_parser.add_argument("skill_directory", help="Skill directory path") package_parser.add_argument("skill_directory", help="Skill directory path")
package_parser.add_argument("--no-open", action="store_true", help="Don't open output folder") package_parser.add_argument("--no-open", action="store_true", help="Don't open output folder")
@@ -162,7 +197,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
# === upload subcommand === # === upload subcommand ===
upload_parser = subparsers.add_parser( upload_parser = subparsers.add_parser(
"upload", help="Upload skill to Claude", description="Upload .zip file to Claude via Anthropic API" "upload",
help="Upload skill to Claude",
description="Upload .zip file to Claude via Anthropic API",
) )
upload_parser.add_argument("zip_file", help=".zip file to upload") upload_parser.add_argument("zip_file", help=".zip file to upload")
upload_parser.add_argument("--api-key", help="Anthropic API key") upload_parser.add_argument("--api-key", help="Anthropic API key")
@@ -183,17 +220,26 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
help="Extract usage examples from test files", help="Extract usage examples from test files",
description="Analyze test files to extract real API usage patterns", description="Analyze test files to extract real API usage patterns",
) )
test_examples_parser.add_argument("directory", nargs="?", help="Directory containing test files")
test_examples_parser.add_argument("--file", help="Single test file to analyze")
test_examples_parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
test_examples_parser.add_argument( test_examples_parser.add_argument(
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)" "directory", nargs="?", help="Directory containing test files"
)
test_examples_parser.add_argument("--file", help="Single test file to analyze")
test_examples_parser.add_argument(
"--language", help="Filter by programming language (python, javascript, etc.)"
)
test_examples_parser.add_argument(
"--min-confidence",
type=float,
default=0.5,
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
) )
test_examples_parser.add_argument( test_examples_parser.add_argument(
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)" "--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
) )
test_examples_parser.add_argument("--json", action="store_true", help="Output JSON format") test_examples_parser.add_argument("--json", action="store_true", help="Output JSON format")
test_examples_parser.add_argument("--markdown", action="store_true", help="Output Markdown format") test_examples_parser.add_argument(
"--markdown", action="store_true", help="Output Markdown format"
)
# === install-agent subcommand === # === install-agent subcommand ===
install_agent_parser = subparsers.add_parser( install_agent_parser = subparsers.add_parser(
@@ -201,9 +247,13 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
help="Install skill to AI agent directories", help="Install skill to AI agent directories",
description="Copy skill to agent-specific installation directories", description="Copy skill to agent-specific installation directories",
) )
install_agent_parser.add_argument("skill_directory", help="Skill directory path (e.g., output/react/)")
install_agent_parser.add_argument( install_agent_parser.add_argument(
"--agent", required=True, help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)" "skill_directory", help="Skill directory path (e.g., output/react/)"
)
install_agent_parser.add_argument(
"--agent",
required=True,
help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)",
) )
install_agent_parser.add_argument( install_agent_parser.add_argument(
"--force", action="store_true", help="Overwrite existing installation without asking" "--force", action="store_true", help="Overwrite existing installation without asking"
@@ -219,18 +269,32 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
description="One-command skill installation (AI enhancement MANDATORY)", description="One-command skill installation (AI enhancement MANDATORY)",
) )
install_parser.add_argument( install_parser.add_argument(
"--config", required=True, help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')" "--config",
required=True,
help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
)
install_parser.add_argument(
"--destination", default="output", help="Output directory (default: output/)"
)
install_parser.add_argument(
"--no-upload", action="store_true", help="Skip automatic upload to Claude"
)
install_parser.add_argument(
"--unlimited", action="store_true", help="Remove page limits during scraping"
)
install_parser.add_argument(
"--dry-run", action="store_true", help="Preview workflow without executing"
) )
install_parser.add_argument("--destination", default="output", help="Output directory (default: output/)")
install_parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude")
install_parser.add_argument("--unlimited", action="store_true", help="Remove page limits during scraping")
install_parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing")
# === resume subcommand === # === resume subcommand ===
resume_parser = subparsers.add_parser( resume_parser = subparsers.add_parser(
"resume", help="Resume interrupted scraping job", description="Continue from saved progress checkpoint" "resume",
help="Resume interrupted scraping job",
description="Continue from saved progress checkpoint",
)
resume_parser.add_argument(
"job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)"
) )
resume_parser.add_argument("job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)")
resume_parser.add_argument("--list", action="store_true", help="List all resumable jobs") resume_parser.add_argument("--list", action="store_true", help="List all resumable jobs")
resume_parser.add_argument("--clean", action="store_true", help="Clean up old progress files") resume_parser.add_argument("--clean", action="store_true", help="Clean up old progress files")

View File

@@ -38,7 +38,9 @@ logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def categorize_issues_by_topic(problems: list[dict], solutions: list[dict], topics: list[str]) -> dict[str, list[dict]]: def categorize_issues_by_topic(
problems: list[dict], solutions: list[dict], topics: list[str]
) -> dict[str, list[dict]]:
""" """
Categorize GitHub issues by topic keywords. Categorize GitHub issues by topic keywords.
@@ -85,7 +87,10 @@ def categorize_issues_by_topic(problems: list[dict], solutions: list[dict], topi
def generate_hybrid_content( def generate_hybrid_content(
api_data: dict, github_docs: dict | None, github_insights: dict | None, conflicts: list[Conflict] api_data: dict,
github_docs: dict | None,
github_insights: dict | None,
conflicts: list[Conflict],
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Generate hybrid content combining API data with GitHub context. Generate hybrid content combining API data with GitHub context.
@@ -133,7 +138,11 @@ def generate_hybrid_content(
hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", []) hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", [])
# Add conflict summary # Add conflict summary
hybrid["conflict_summary"] = {"total_conflicts": len(conflicts), "by_type": {}, "by_severity": {}} hybrid["conflict_summary"] = {
"total_conflicts": len(conflicts),
"by_type": {},
"by_severity": {},
}
for conflict in conflicts: for conflict in conflicts:
# Count by type # Count by type
@@ -159,7 +168,9 @@ def generate_hybrid_content(
return hybrid return hybrid
def _match_issues_to_apis(apis: dict[str, dict], problems: list[dict], solutions: list[dict]) -> dict[str, list[dict]]: def _match_issues_to_apis(
apis: dict[str, dict], problems: list[dict], solutions: list[dict]
) -> dict[str, list[dict]]:
""" """
Match GitHub issues to specific APIs by keyword matching. Match GitHub issues to specific APIs by keyword matching.
@@ -651,7 +662,12 @@ read -p "Press Enter when merge is complete..."
# Open new terminal with Claude Code # Open new terminal with Claude Code
# Try different terminal emulators # Try different terminal emulators
terminals = [["x-terminal-emulator", "-e"], ["gnome-terminal", "--"], ["xterm", "-e"], ["konsole", "-e"]] terminals = [
["x-terminal-emulator", "-e"],
["gnome-terminal", "--"],
["xterm", "-e"],
["konsole", "-e"],
]
for terminal_cmd in terminals: for terminal_cmd in terminals:
try: try:
@@ -735,7 +751,9 @@ def merge_sources(
if github_streams: if github_streams:
logger.info("GitHub streams available for multi-layer merge") logger.info("GitHub streams available for multi-layer merge")
if github_streams.docs_stream: if github_streams.docs_stream:
logger.info(f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files") logger.info(
f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files"
)
if github_streams.insights_stream: if github_streams.insights_stream:
problems = len(github_streams.insights_stream.common_problems) problems = len(github_streams.insights_stream.common_problems)
solutions = len(github_streams.insights_stream.known_solutions) solutions = len(github_streams.insights_stream.known_solutions)
@@ -766,7 +784,11 @@ if __name__ == "__main__":
parser.add_argument("github_data", help="Path to GitHub data JSON") parser.add_argument("github_data", help="Path to GitHub data JSON")
parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path") parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path")
parser.add_argument( parser.add_argument(
"--mode", "-m", choices=["rule-based", "claude-enhanced"], default="rule-based", help="Merge mode" "--mode",
"-m",
choices=["rule-based", "claude-enhanced"],
default="rule-based",
help="Merge mode",
) )
args = parser.parse_args() args = parser.parse_args()

View File

@@ -17,12 +17,22 @@ from pathlib import Path
# Import utilities # Import utilities
try: try:
from quality_checker import SkillQualityChecker, print_report from quality_checker import SkillQualityChecker, print_report
from utils import format_file_size, open_folder, print_upload_instructions, validate_skill_directory from utils import (
format_file_size,
open_folder,
print_upload_instructions,
validate_skill_directory,
)
except ImportError: except ImportError:
# If running from different directory, add cli to path # If running from different directory, add cli to path
sys.path.insert(0, str(Path(__file__).parent)) sys.path.insert(0, str(Path(__file__).parent))
from quality_checker import SkillQualityChecker, print_report from quality_checker import SkillQualityChecker, print_report
from utils import format_file_size, open_folder, print_upload_instructions, validate_skill_directory from utils import (
format_file_size,
open_folder,
print_upload_instructions,
validate_skill_directory,
)
def package_skill(skill_dir, open_folder_after=True, skip_quality_check=False, target="claude"): def package_skill(skill_dir, open_folder_after=True, skip_quality_check=False, target="claude"):
@@ -135,9 +145,13 @@ Examples:
parser.add_argument("skill_dir", help="Path to skill directory (e.g., output/react/)") parser.add_argument("skill_dir", help="Path to skill directory (e.g., output/react/)")
parser.add_argument("--no-open", action="store_true", help="Do not open the output folder after packaging") parser.add_argument(
"--no-open", action="store_true", help="Do not open the output folder after packaging"
)
parser.add_argument("--skip-quality-check", action="store_true", help="Skip quality checks before packaging") parser.add_argument(
"--skip-quality-check", action="store_true", help="Skip quality checks before packaging"
)
parser.add_argument( parser.add_argument(
"--target", "--target",
@@ -147,7 +161,9 @@ Examples:
) )
parser.add_argument( parser.add_argument(
"--upload", action="store_true", help="Automatically upload after packaging (requires platform API key)" "--upload",
action="store_true",
help="Automatically upload after packaging (requires platform API key)",
) )
args = parser.parse_args() args = parser.parse_args()

View File

@@ -135,7 +135,9 @@ class BasePatternDetector:
# Default: no deep detection # Default: no deep detection
return None return None
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None: def detect_full(
self, class_sig, all_classes: list, file_content: str
) -> PatternInstance | None:
""" """
Full detection using behavioral analysis. Full detection using behavioral analysis.
@@ -150,7 +152,9 @@ class BasePatternDetector:
# Default: no full detection # Default: no full detection
return None return None
def detect(self, class_sig, all_classes: list, file_content: str | None = None) -> PatternInstance | None: def detect(
self, class_sig, all_classes: list, file_content: str | None = None
) -> PatternInstance | None:
""" """
Detect pattern based on configured depth. Detect pattern based on configured depth.
@@ -273,7 +277,9 @@ class PatternRecognizer:
for class_sig in class_sigs: for class_sig in class_sigs:
for detector in self.detectors: for detector in self.detectors:
pattern = detector.detect( pattern = detector.detect(
class_sig=class_sig, all_classes=class_sigs, file_content=content if self.depth == "full" else None class_sig=class_sig,
all_classes=class_sigs,
file_content=content if self.depth == "full" else None,
) )
if pattern: if pattern:
@@ -327,7 +333,9 @@ class PatternRecognizer:
params = [] params = []
for param in method.get("parameters", []): for param in method.get("parameters", []):
param_obj = SimpleNamespace( param_obj = SimpleNamespace(
name=param.get("name", ""), type_hint=param.get("type_hint"), default=param.get("default") name=param.get("name", ""),
type_hint=param.get("type_hint"),
default=param.get("default"),
) )
params.append(param_obj) params.append(param_obj)
@@ -397,7 +405,14 @@ class SingletonDetector(BasePatternDetector):
confidence = 0.0 confidence = 0.0
# Check for instance method (getInstance, instance, get_instance, etc.) # Check for instance method (getInstance, instance, get_instance, etc.)
instance_methods = ["getInstance", "instance", "get_instance", "Instance", "GetInstance", "INSTANCE"] instance_methods = [
"getInstance",
"instance",
"get_instance",
"Instance",
"GetInstance",
"INSTANCE",
]
has_instance_method = False has_instance_method = False
for method in class_sig.methods: for method in class_sig.methods:
@@ -438,7 +453,9 @@ class SingletonDetector(BasePatternDetector):
# Fallback to surface detection # Fallback to surface detection
return self.detect_surface(class_sig, all_classes) return self.detect_surface(class_sig, all_classes)
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None: def detect_full(
self, class_sig, all_classes: list, file_content: str
) -> PatternInstance | None:
""" """
Full behavioral analysis for Singleton. Full behavioral analysis for Singleton.
@@ -767,7 +784,9 @@ class StrategyDetector(BasePatternDetector):
siblings = [ siblings = [
cls.name cls.name
for cls in all_classes for cls in all_classes
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name if cls.base_classes
and base_class in cls.base_classes
and cls.name != class_sig.name
] ]
if siblings: if siblings:
@@ -885,7 +904,9 @@ class DecoratorDetector(BasePatternDetector):
siblings = [ siblings = [
cls.name cls.name
for cls in all_classes for cls in all_classes
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name if cls.base_classes
and base_class in cls.base_classes
and cls.name != class_sig.name
] ]
if siblings: if siblings:
@@ -898,7 +919,10 @@ class DecoratorDetector(BasePatternDetector):
# Check if takes object parameter (not just self) # Check if takes object parameter (not just self)
if len(init_method.parameters) > 1: # More than just 'self' if len(init_method.parameters) > 1: # More than just 'self'
param_names = [p.name for p in init_method.parameters if p.name != "self"] param_names = [p.name for p in init_method.parameters if p.name != "self"]
if any(name in ["wrapped", "component", "inner", "obj", "target"] for name in param_names): if any(
name in ["wrapped", "component", "inner", "obj", "target"]
for name in param_names
):
evidence.append(f"Takes wrapped object in constructor: {param_names}") evidence.append(f"Takes wrapped object in constructor: {param_names}")
confidence += 0.4 confidence += 0.4
@@ -969,7 +993,8 @@ class BuilderDetector(BasePatternDetector):
# Check for build/create terminal method # Check for build/create terminal method
terminal_methods = ["build", "create", "execute", "construct", "make"] terminal_methods = ["build", "create", "execute", "construct", "make"]
has_terminal = any( has_terminal = any(
m.name.lower() in terminal_methods or m.name.lower().startswith("build") for m in class_sig.methods m.name.lower() in terminal_methods or m.name.lower().startswith("build")
for m in class_sig.methods
) )
if has_terminal: if has_terminal:
@@ -979,7 +1004,9 @@ class BuilderDetector(BasePatternDetector):
# Check for setter methods (with_, set_, add_) # Check for setter methods (with_, set_, add_)
setter_prefixes = ["with", "set", "add", "configure"] setter_prefixes = ["with", "set", "add", "configure"]
setter_count = sum( setter_count = sum(
1 for m in class_sig.methods if any(m.name.lower().startswith(prefix) for prefix in setter_prefixes) 1
for m in class_sig.methods
if any(m.name.lower().startswith(prefix) for prefix in setter_prefixes)
) )
if setter_count >= 3: if setter_count >= 3:
@@ -1006,7 +1033,9 @@ class BuilderDetector(BasePatternDetector):
# Fallback to surface # Fallback to surface
return self.detect_surface(class_sig, all_classes) return self.detect_surface(class_sig, all_classes)
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None: def detect_full(
self, class_sig, all_classes: list, file_content: str
) -> PatternInstance | None:
"""Full behavioral analysis for Builder""" """Full behavioral analysis for Builder"""
# Start with deep detection # Start with deep detection
pattern = self.detect_deep(class_sig, all_classes) pattern = self.detect_deep(class_sig, all_classes)
@@ -1186,7 +1215,9 @@ class CommandDetector(BasePatternDetector):
has_execute = any(m.name.lower() in execute_methods for m in class_sig.methods) has_execute = any(m.name.lower() in execute_methods for m in class_sig.methods)
if has_execute: if has_execute:
method_name = next(m.name for m in class_sig.methods if m.name.lower() in execute_methods) method_name = next(
m.name for m in class_sig.methods if m.name.lower() in execute_methods
)
evidence.append(f"Has execute method: {method_name}()") evidence.append(f"Has execute method: {method_name}()")
confidence += 0.5 confidence += 0.5
@@ -1299,7 +1330,9 @@ class TemplateMethodDetector(BasePatternDetector):
] ]
hook_methods = [ hook_methods = [
m.name for m in class_sig.methods if any(keyword in m.name.lower() for keyword in hook_keywords) m.name
for m in class_sig.methods
if any(keyword in m.name.lower() for keyword in hook_keywords)
] ]
if len(hook_methods) >= 2: if len(hook_methods) >= 2:
@@ -1307,7 +1340,11 @@ class TemplateMethodDetector(BasePatternDetector):
confidence += 0.3 confidence += 0.3
# Check for abstract methods (no implementation or pass/raise) # Check for abstract methods (no implementation or pass/raise)
abstract_methods = [m.name for m in class_sig.methods if m.name.startswith("_") or "abstract" in m.name.lower()] abstract_methods = [
m.name
for m in class_sig.methods
if m.name.startswith("_") or "abstract" in m.name.lower()
]
if abstract_methods: if abstract_methods:
evidence.append(f"Has abstract methods: {', '.join(abstract_methods[:2])}") evidence.append(f"Has abstract methods: {', '.join(abstract_methods[:2])}")
@@ -1383,7 +1420,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
# Check for handle/process method # Check for handle/process method
handle_methods = ["handle", "process", "execute", "filter", "middleware"] handle_methods = ["handle", "process", "execute", "filter", "middleware"]
has_handle = any( has_handle = any(
m.name.lower() in handle_methods or m.name.lower().startswith("handle") for m in class_sig.methods m.name.lower() in handle_methods or m.name.lower().startswith("handle")
for m in class_sig.methods
) )
if has_handle: if has_handle:
@@ -1405,7 +1443,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
# Check for set_next() method # Check for set_next() method
has_set_next = any( has_set_next = any(
"next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower()) for m in class_sig.methods "next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower())
for m in class_sig.methods
) )
if has_set_next: if has_set_next:
@@ -1419,7 +1458,9 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
siblings = [ siblings = [
cls.name cls.name
for cls in all_classes for cls in all_classes
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name if cls.base_classes
and base_class in cls.base_classes
and cls.name != class_sig.name
] ]
if siblings and has_next_ref: if siblings and has_next_ref:
@@ -1625,16 +1666,22 @@ Supported Languages:
""", """,
) )
parser.add_argument("--file", action="append", help="Source file to analyze (can be specified multiple times)") parser.add_argument(
"--file", action="append", help="Source file to analyze (can be specified multiple times)"
)
parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)") parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)")
parser.add_argument("--output", help="Output directory for results (default: current directory)") parser.add_argument(
"--output", help="Output directory for results (default: current directory)"
)
parser.add_argument( parser.add_argument(
"--depth", "--depth",
choices=["surface", "deep", "full"], choices=["surface", "deep", "full"],
default="deep", default="deep",
help="Detection depth: surface (fast), deep (default), full (thorough)", help="Detection depth: surface (fast), deep (default), full (thorough)",
) )
parser.add_argument("--json", action="store_true", help="Output JSON format instead of human-readable") parser.add_argument(
"--json", action="store_true", help="Output JSON format instead of human-readable"
)
parser.add_argument("--verbose", action="store_true", help="Enable verbose output") parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
args = parser.parse_args() args = parser.parse_args()
@@ -1697,7 +1744,9 @@ Supported Languages:
if not args.json and args.verbose: if not args.json and args.verbose:
print(f"\n{file_path}:") print(f"\n{file_path}:")
for pattern in report.patterns: for pattern in report.patterns:
print(f" [{pattern.pattern_type}] {pattern.class_name} (confidence: {pattern.confidence:.2f})") print(
f" [{pattern.pattern_type}] {pattern.class_name} (confidence: {pattern.confidence:.2f})"
)
except Exception as e: except Exception as e:
if args.verbose: if args.verbose:
@@ -1737,11 +1786,15 @@ Supported Languages:
pattern_counts = {} pattern_counts = {}
for report in all_reports: for report in all_reports:
for pattern in report.patterns: for pattern in report.patterns:
pattern_counts[pattern.pattern_type] = pattern_counts.get(pattern.pattern_type, 0) + 1 pattern_counts[pattern.pattern_type] = (
pattern_counts.get(pattern.pattern_type, 0) + 1
)
if pattern_counts: if pattern_counts:
print("Pattern Summary:") print("Pattern Summary:")
for pattern_type, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True): for pattern_type, count in sorted(
pattern_counts.items(), key=lambda x: x[1], reverse=True
):
print(f" {pattern_type}: {count}") print(f" {pattern_type}: {count}")
print() print()

View File

@@ -196,7 +196,9 @@ class PDFExtractor:
"col_count": len(tab.extract()[0]) if tab.extract() else 0, "col_count": len(tab.extract()[0]) if tab.extract() else 0,
} }
tables.append(table_data) tables.append(table_data)
self.log(f" Found table {idx}: {table_data['row_count']}x{table_data['col_count']}") self.log(
f" Found table {idx}: {table_data['row_count']}x{table_data['col_count']}"
)
except Exception as e: except Exception as e:
self.log(f" Table extraction failed: {e}") self.log(f" Table extraction failed: {e}")
@@ -294,7 +296,9 @@ class PDFExtractor:
issues.append("May be natural language, not code") issues.append("May be natural language, not code")
# Check code/comment ratio # Check code/comment ratio
comment_lines = sum(1 for line in code.split("\n") if line.strip().startswith(("#", "//", "/*", "*", "--"))) comment_lines = sum(
1 for line in code.split("\n") if line.strip().startswith(("#", "//", "/*", "*", "--"))
)
total_lines = len([l for l in code.split("\n") if l.strip()]) total_lines = len([l for l in code.split("\n") if l.strip()])
if total_lines > 0 and comment_lines / total_lines > 0.7: if total_lines > 0 and comment_lines / total_lines > 0.7:
issues.append("Mostly comments") issues.append("Mostly comments")
@@ -501,11 +505,17 @@ class PDFExtractor:
# Common code patterns that span multiple lines # Common code patterns that span multiple lines
patterns = [ patterns = [
# Function definitions # Function definitions
(r"((?:def|function|func|fn|public|private)\s+\w+\s*\([^)]*\)\s*[{:]?[^}]*[}]?)", "function"), (
r"((?:def|function|func|fn|public|private)\s+\w+\s*\([^)]*\)\s*[{:]?[^}]*[}]?)",
"function",
),
# Class definitions # Class definitions
(r"(class\s+\w+[^{]*\{[^}]*\})", "class"), (r"(class\s+\w+[^{]*\{[^}]*\})", "class"),
# Import statements block # Import statements block
(r"((?:import|require|use|include)[^\n]+(?:\n(?:import|require|use|include)[^\n]+)*)", "imports"), (
r"((?:import|require|use|include)[^\n]+(?:\n(?:import|require|use|include)[^\n]+)*)",
"imports",
),
] ]
for pattern, block_type in patterns: for pattern, block_type in patterns:
@@ -628,7 +638,15 @@ class PDFExtractor:
""" """
if self.chunk_size == 0: if self.chunk_size == 0:
# No chunking - return all pages as one chunk # No chunking - return all pages as one chunk
return [{"chunk_number": 1, "start_page": 1, "end_page": len(pages), "pages": pages, "chapter_title": None}] return [
{
"chunk_number": 1,
"start_page": 1,
"end_page": len(pages),
"pages": pages,
"chapter_title": None,
}
]
chunks = [] chunks = []
current_chunk = [] current_chunk = []
@@ -812,7 +830,9 @@ class PDFExtractor:
code_samples = [c for c in code_samples if c["quality_score"] >= self.min_quality] code_samples = [c for c in code_samples if c["quality_score"] >= self.min_quality]
filtered_count = code_samples_before - len(code_samples) filtered_count = code_samples_before - len(code_samples)
if filtered_count > 0: if filtered_count > 0:
self.log(f" Filtered out {filtered_count} low-quality code blocks (min_quality={self.min_quality})") self.log(
f" Filtered out {filtered_count} low-quality code blocks (min_quality={self.min_quality})"
)
# Sort by quality score (highest first) # Sort by quality score (highest first)
code_samples.sort(key=lambda x: x["quality_score"], reverse=True) code_samples.sort(key=lambda x: x["quality_score"], reverse=True)
@@ -891,7 +911,9 @@ class PDFExtractor:
# Show feature status # Show feature status
if self.use_ocr: if self.use_ocr:
status = "✅ enabled" if TESSERACT_AVAILABLE else "⚠️ not available (install pytesseract)" status = (
"✅ enabled" if TESSERACT_AVAILABLE else "⚠️ not available (install pytesseract)"
)
print(f" OCR: {status}") print(f" OCR: {status}")
if self.extract_tables: if self.extract_tables:
print(" Table extraction: ✅ enabled") print(" Table extraction: ✅ enabled")
@@ -905,7 +927,9 @@ class PDFExtractor:
# Extract each page (with parallel processing - Priority 3) # Extract each page (with parallel processing - Priority 3)
if self.parallel and CONCURRENT_AVAILABLE and len(self.doc) > 5: if self.parallel and CONCURRENT_AVAILABLE and len(self.doc) > 5:
print(f"🚀 Extracting {len(self.doc)} pages in parallel ({self.max_workers} workers)...") print(
f"🚀 Extracting {len(self.doc)} pages in parallel ({self.max_workers} workers)..."
)
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
page_numbers = list(range(len(self.doc))) page_numbers = list(range(len(self.doc)))
self.pages = list(executor.map(self.extract_page, page_numbers)) self.pages = list(executor.map(self.extract_page, page_numbers))
@@ -962,7 +986,11 @@ class PDFExtractor:
for chunk in chunks: for chunk in chunks:
if chunk["chapter_title"]: if chunk["chapter_title"]:
chapters.append( chapters.append(
{"title": chunk["chapter_title"], "start_page": chunk["start_page"], "end_page": chunk["end_page"]} {
"title": chunk["chapter_title"],
"start_page": chunk["start_page"],
"end_page": chunk["end_page"],
}
) )
result = { result = {
@@ -1042,12 +1070,21 @@ Examples:
parser.add_argument("-o", "--output", help="Output JSON file path (default: print to stdout)") parser.add_argument("-o", "--output", help="Output JSON file path (default: print to stdout)")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output") parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
parser.add_argument("--chunk-size", type=int, default=10, help="Pages per chunk (0 = no chunking, default: 10)")
parser.add_argument("--no-merge", action="store_true", help="Disable merging code blocks across pages")
parser.add_argument( parser.add_argument(
"--min-quality", type=float, default=0.0, help="Minimum code quality score (0-10, default: 0 = no filtering)" "--chunk-size", type=int, default=10, help="Pages per chunk (0 = no chunking, default: 10)"
)
parser.add_argument(
"--no-merge", action="store_true", help="Disable merging code blocks across pages"
)
parser.add_argument(
"--min-quality",
type=float,
default=0.0,
help="Minimum code quality score (0-10, default: 0 = no filtering)",
)
parser.add_argument(
"--extract-images", action="store_true", help="Extract images to files (NEW in B1.5)"
) )
parser.add_argument("--extract-images", action="store_true", help="Extract images to files (NEW in B1.5)")
parser.add_argument( parser.add_argument(
"--image-dir", "--image-dir",
type=str, type=str,
@@ -1062,12 +1099,22 @@ Examples:
) )
# Advanced features (Priority 2 & 3) # Advanced features (Priority 2 & 3)
parser.add_argument("--ocr", action="store_true", help="Use OCR for scanned PDFs (requires pytesseract)") parser.add_argument(
"--ocr", action="store_true", help="Use OCR for scanned PDFs (requires pytesseract)"
)
parser.add_argument("--password", type=str, default=None, help="Password for encrypted PDF") parser.add_argument("--password", type=str, default=None, help="Password for encrypted PDF")
parser.add_argument("--extract-tables", action="store_true", help="Extract tables from PDF (Priority 2)") parser.add_argument(
parser.add_argument("--parallel", action="store_true", help="Process pages in parallel (Priority 3)") "--extract-tables", action="store_true", help="Extract tables from PDF (Priority 2)"
parser.add_argument("--workers", type=int, default=None, help="Number of parallel workers (default: CPU count)") )
parser.add_argument("--no-cache", action="store_true", help="Disable caching of expensive operations") parser.add_argument(
"--parallel", action="store_true", help="Process pages in parallel (Priority 3)"
)
parser.add_argument(
"--workers", type=int, default=None, help="Number of parallel workers (default: CPU count)"
)
parser.add_argument(
"--no-cache", action="store_true", help="Disable caching of expensive operations"
)
args = parser.parse_args() args = parser.parse_args()

View File

@@ -54,7 +54,11 @@ def infer_description_from_pdf(pdf_metadata: dict = None, name: str = "") -> str
return f"Use when working with {title.lower()}" return f"Use when working with {title.lower()}"
# Improved fallback # Improved fallback
return f"Use when referencing {name} documentation" if name else "Use when referencing this documentation" return (
f"Use when referencing {name} documentation"
if name
else "Use when referencing this documentation"
)
class PDFToSkillConverter: class PDFToSkillConverter:
@@ -65,7 +69,9 @@ class PDFToSkillConverter:
self.name = config["name"] self.name = config["name"]
self.pdf_path = config.get("pdf_path", "") self.pdf_path = config.get("pdf_path", "")
# Set initial description (will be improved after extraction if metadata available) # Set initial description (will be improved after extraction if metadata available)
self.description = config.get("description", f"Use when referencing {self.name} documentation") self.description = config.get(
"description", f"Use when referencing {self.name} documentation"
)
# Paths # Paths
self.skill_dir = f"output/{self.name}" self.skill_dir = f"output/{self.name}"
@@ -151,7 +157,10 @@ class PDFToSkillConverter:
if isinstance(first_value, list) and first_value and isinstance(first_value[0], dict): if isinstance(first_value, list) and first_value and isinstance(first_value[0], dict):
# Already categorized - convert to expected format # Already categorized - convert to expected format
for cat_key, pages in self.categories.items(): for cat_key, pages in self.categories.items():
categorized[cat_key] = {"title": cat_key.replace("_", " ").title(), "pages": pages} categorized[cat_key] = {
"title": cat_key.replace("_", " ").title(),
"pages": pages,
}
else: else:
# Keyword-based categorization # Keyword-based categorization
# Initialize categories # Initialize categories
@@ -171,7 +180,8 @@ class PDFToSkillConverter:
score = sum( score = sum(
1 1
for kw in keywords for kw in keywords
if isinstance(kw, str) and (kw.lower() in text or kw.lower() in headings_text) if isinstance(kw, str)
and (kw.lower() in text or kw.lower() in headings_text)
) )
else: else:
score = 0 score = 0
@@ -490,7 +500,13 @@ class PDFToSkillConverter:
for keyword in pattern_keywords: for keyword in pattern_keywords:
if keyword in heading_text: if keyword in heading_text:
page_num = page.get("page_number", 0) page_num = page.get("page_number", 0)
patterns.append({"type": keyword.title(), "heading": heading.get("text", ""), "page": page_num}) patterns.append(
{
"type": keyword.title(),
"heading": heading.get("text", ""),
"page": page_num,
}
)
break # Only add once per heading break # Only add once per heading
if not patterns: if not patterns:
@@ -526,7 +542,8 @@ class PDFToSkillConverter:
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Convert PDF documentation to Claude skill", formatter_class=argparse.RawDescriptionHelpFormatter description="Convert PDF documentation to Claude skill",
formatter_class=argparse.RawDescriptionHelpFormatter,
) )
parser.add_argument("--config", help="PDF config JSON file") parser.add_argument("--config", help="PDF config JSON file")
@@ -548,7 +565,10 @@ def main():
elif args.from_json: elif args.from_json:
# Build from extracted JSON # Build from extracted JSON
name = Path(args.from_json).stem.replace("_extracted", "") name = Path(args.from_json).stem.replace("_extracted", "")
config = {"name": name, "description": args.description or f"Use when referencing {name} documentation"} config = {
"name": name,
"description": args.description or f"Use when referencing {name} documentation",
}
converter = PDFToSkillConverter(config) converter = PDFToSkillConverter(config)
converter.load_extracted_data(args.from_json) converter.load_extracted_data(args.from_json)
converter.build_skill() converter.build_skill()
@@ -561,7 +581,12 @@ def main():
"name": args.name, "name": args.name,
"pdf_path": args.pdf, "pdf_path": args.pdf,
"description": args.description or f"Use when referencing {args.name} documentation", "description": args.description or f"Use when referencing {args.name} documentation",
"extract_options": {"chunk_size": 10, "min_quality": 5.0, "extract_images": True, "min_image_size": 100}, "extract_options": {
"chunk_size": 10,
"min_quality": 5.0,
"extract_images": True,
"min_image_size": 100,
},
} }
# Create converter # Create converter

View File

@@ -138,7 +138,9 @@ class SkillQualityChecker:
# Check references directory exists # Check references directory exists
if not self.references_dir.exists(): if not self.references_dir.exists():
self.report.add_warning( self.report.add_warning(
"structure", "references/ directory not found - skill may be incomplete", str(self.references_dir) "structure",
"references/ directory not found - skill may be incomplete",
str(self.references_dir),
) )
elif not list(self.references_dir.rglob("*.md")): elif not list(self.references_dir.rglob("*.md")):
self.report.add_warning( self.report.add_warning(
@@ -197,7 +199,9 @@ class SkillQualityChecker:
if sections < 4: if sections < 4:
self.report.add_warning( self.report.add_warning(
"enhancement", f"Only {sections} sections found - SKILL.md may be too basic", "SKILL.md" "enhancement",
f"Only {sections} sections found - SKILL.md may be too basic",
"SKILL.md",
) )
else: else:
self.report.add_info("enhancement", f"✓ Found {sections} sections", "SKILL.md") self.report.add_info("enhancement", f"✓ Found {sections} sections", "SKILL.md")
@@ -211,7 +215,9 @@ class SkillQualityChecker:
# Check YAML frontmatter # Check YAML frontmatter
if not content.startswith("---"): if not content.startswith("---"):
self.report.add_error("content", "Missing YAML frontmatter - SKILL.md must start with ---", "SKILL.md", 1) self.report.add_error(
"content", "Missing YAML frontmatter - SKILL.md must start with ---", "SKILL.md", 1
)
else: else:
# Extract frontmatter # Extract frontmatter
try: try:
@@ -221,26 +227,38 @@ class SkillQualityChecker:
# Check for required fields # Check for required fields
if "name:" not in frontmatter: if "name:" not in frontmatter:
self.report.add_error("content", 'Missing "name:" field in YAML frontmatter', "SKILL.md", 2) self.report.add_error(
"content", 'Missing "name:" field in YAML frontmatter', "SKILL.md", 2
)
# Check for description # Check for description
if "description:" in frontmatter: if "description:" in frontmatter:
self.report.add_info("content", "✓ YAML frontmatter includes description", "SKILL.md") self.report.add_info(
"content", "✓ YAML frontmatter includes description", "SKILL.md"
)
else: else:
self.report.add_error("content", "Invalid YAML frontmatter format", "SKILL.md", 1) self.report.add_error(
"content", "Invalid YAML frontmatter format", "SKILL.md", 1
)
except Exception as e: except Exception as e:
self.report.add_error("content", f"Error parsing YAML frontmatter: {e}", "SKILL.md", 1) self.report.add_error(
"content", f"Error parsing YAML frontmatter: {e}", "SKILL.md", 1
)
# Check code block language tags # Check code block language tags
code_blocks_without_lang = re.findall(r"```\n[^`]", content) code_blocks_without_lang = re.findall(r"```\n[^`]", content)
if code_blocks_without_lang: if code_blocks_without_lang:
self.report.add_warning( self.report.add_warning(
"content", f"Found {len(code_blocks_without_lang)} code blocks without language tags", "SKILL.md" "content",
f"Found {len(code_blocks_without_lang)} code blocks without language tags",
"SKILL.md",
) )
# Check for "When to Use" section # Check for "When to Use" section
if "when to use" not in content.lower(): if "when to use" not in content.lower():
self.report.add_warning("content", 'Missing "When to Use This Skill" section', "SKILL.md") self.report.add_warning(
"content", 'Missing "When to Use This Skill" section', "SKILL.md"
)
else: else:
self.report.add_info("content", '✓ Found "When to Use" section', "SKILL.md") self.report.add_info("content", '✓ Found "When to Use" section', "SKILL.md")
@@ -248,7 +266,9 @@ class SkillQualityChecker:
if self.references_dir.exists(): if self.references_dir.exists():
ref_files = list(self.references_dir.rglob("*.md")) ref_files = list(self.references_dir.rglob("*.md"))
if ref_files: if ref_files:
self.report.add_info("content", f"✓ Found {len(ref_files)} reference files", "references/") self.report.add_info(
"content", f"✓ Found {len(ref_files)} reference files", "references/"
)
# Check if references are mentioned in SKILL.md # Check if references are mentioned in SKILL.md
mentioned_refs = 0 mentioned_refs = 0
@@ -258,7 +278,9 @@ class SkillQualityChecker:
if mentioned_refs == 0: if mentioned_refs == 0:
self.report.add_warning( self.report.add_warning(
"content", "Reference files exist but none are mentioned in SKILL.md", "SKILL.md" "content",
"Reference files exist but none are mentioned in SKILL.md",
"SKILL.md",
) )
def _check_links(self): def _check_links(self):
@@ -295,7 +317,9 @@ class SkillQualityChecker:
if links: if links:
internal_links = [l for t, l in links if not l.startswith("http")] internal_links = [l for t, l in links if not l.startswith("http")]
if internal_links: if internal_links:
self.report.add_info("links", f"✓ All {len(internal_links)} internal links are valid", "SKILL.md") self.report.add_info(
"links", f"✓ All {len(internal_links)} internal links are valid", "SKILL.md"
)
def _check_skill_completeness(self): def _check_skill_completeness(self):
"""Check skill completeness based on best practices. """Check skill completeness based on best practices.
@@ -316,9 +340,13 @@ class SkillQualityChecker:
r"requirements?:", r"requirements?:",
r"make\s+sure\s+you\s+have", r"make\s+sure\s+you\s+have",
] ]
has_grounding = any(re.search(pattern, content, re.IGNORECASE) for pattern in grounding_patterns) has_grounding = any(
re.search(pattern, content, re.IGNORECASE) for pattern in grounding_patterns
)
if has_grounding: if has_grounding:
self.report.add_info("completeness", "✓ Found verification/prerequisites section", "SKILL.md") self.report.add_info(
"completeness", "✓ Found verification/prerequisites section", "SKILL.md"
)
else: else:
self.report.add_info( self.report.add_info(
"completeness", "completeness",
@@ -334,12 +362,18 @@ class SkillQualityChecker:
r"error\s+handling", r"error\s+handling",
r"when\s+things\s+go\s+wrong", r"when\s+things\s+go\s+wrong",
] ]
has_error_handling = any(re.search(pattern, content, re.IGNORECASE) for pattern in error_patterns) has_error_handling = any(
re.search(pattern, content, re.IGNORECASE) for pattern in error_patterns
)
if has_error_handling: if has_error_handling:
self.report.add_info("completeness", "✓ Found error handling/troubleshooting guidance", "SKILL.md") self.report.add_info(
"completeness", "✓ Found error handling/troubleshooting guidance", "SKILL.md"
)
else: else:
self.report.add_info( self.report.add_info(
"completeness", "Consider adding troubleshooting section for common issues", "SKILL.md" "completeness",
"Consider adding troubleshooting section for common issues",
"SKILL.md",
) )
# Check for workflow steps (numbered or sequential indicators) # Check for workflow steps (numbered or sequential indicators)
@@ -351,10 +385,14 @@ class SkillQualityChecker:
r"finally,?\s+", r"finally,?\s+",
r"next,?\s+", r"next,?\s+",
] ]
steps_found = sum(1 for pattern in step_patterns if re.search(pattern, content, re.IGNORECASE)) steps_found = sum(
1 for pattern in step_patterns if re.search(pattern, content, re.IGNORECASE)
)
if steps_found >= 3: if steps_found >= 3:
self.report.add_info( self.report.add_info(
"completeness", f"✓ Found clear workflow indicators ({steps_found} step markers)", "SKILL.md" "completeness",
f"✓ Found clear workflow indicators ({steps_found} step markers)",
"SKILL.md",
) )
elif steps_found > 0: elif steps_found > 0:
self.report.add_info( self.report.add_info(
@@ -451,7 +489,9 @@ Examples:
parser.add_argument("--verbose", "-v", action="store_true", help="Show all info messages") parser.add_argument("--verbose", "-v", action="store_true", help="Show all info messages")
parser.add_argument("--strict", action="store_true", help="Exit with error code if any warnings or errors found") parser.add_argument(
"--strict", action="store_true", help="Exit with error code if any warnings or errors found"
)
args = parser.parse_args() args = parser.parse_args()

View File

@@ -179,7 +179,12 @@ class RateLimitHandler:
reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
return {"limit": limit, "remaining": remaining, "reset_timestamp": reset_timestamp, "reset_time": reset_time} return {
"limit": limit,
"remaining": remaining,
"reset_timestamp": reset_timestamp,
"reset_time": reset_time,
}
def get_rate_limit_info(self) -> dict[str, Any]: def get_rate_limit_info(self) -> dict[str, Any]:
""" """

View File

@@ -136,7 +136,9 @@ def print_summary(result):
# Category breakdown # Category breakdown
if hasattr(result, "test_results"): if hasattr(result, "test_results"):
print(f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}") print(
f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}"
)
categories = {} categories = {}
for status, test in result.test_results: for status, test in result.test_results:
@@ -164,11 +166,16 @@ def main():
import argparse import argparse
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Run tests for Skill Seeker", formatter_class=argparse.RawDescriptionHelpFormatter description="Run tests for Skill Seeker",
formatter_class=argparse.RawDescriptionHelpFormatter,
) )
parser.add_argument("--suite", "-s", type=str, help="Run specific test suite (config, features, integration)") parser.add_argument(
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output (show each test)") "--suite", "-s", type=str, help="Run specific test suite (config, features, integration)"
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Verbose output (show each test)"
)
parser.add_argument("--quiet", "-q", action="store_true", help="Quiet output (minimal output)") parser.add_argument("--quiet", "-q", action="store_true", help="Quiet output (minimal output)")
parser.add_argument("--failfast", "-f", action="store_true", help="Stop on first failure") parser.add_argument("--failfast", "-f", action="store_true", help="Stop on first failure")
parser.add_argument("--list", "-l", action="store_true", help="List all available tests") parser.add_argument("--list", "-l", action="store_true", help="List all available tests")
@@ -188,7 +195,9 @@ def main():
# Discover or load specific suite # Discover or load specific suite
if args.suite: if args.suite:
print(f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n") print(
f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n"
)
suite = run_specific_suite(args.suite) suite = run_specific_suite(args.suite)
if suite is None: if suite is None:
return 1 return 1

View File

@@ -50,7 +50,9 @@ class ConfigSplitter:
print(" Single source unified config - no splitting needed") print(" Single source unified config - no splitting needed")
return "none" return "none"
else: else:
print(f" Multi-source unified config ({num_sources} sources) - source split recommended") print(
f" Multi-source unified config ({num_sources} sources) - source split recommended"
)
return "source" return "source"
# For unified configs, only 'source' and 'none' strategies are valid # For unified configs, only 'source' and 'none' strategies are valid
elif self.strategy in ["source", "none"]: elif self.strategy in ["source", "none"]:
@@ -77,7 +79,9 @@ class ConfigSplitter:
print(f" Medium documentation ({max_pages} pages) - category split recommended") print(f" Medium documentation ({max_pages} pages) - category split recommended")
return "category" return "category"
elif "categories" in self.config and len(self.config["categories"]) >= 3: elif "categories" in self.config and len(self.config["categories"]) >= 3:
print(f" Large documentation ({max_pages} pages) - router + categories recommended") print(
f" Large documentation ({max_pages} pages) - router + categories recommended"
)
return "router" return "router"
else: else:
print(f" Large documentation ({max_pages} pages) - size-based split") print(f" Large documentation ({max_pages} pages) - size-based split")
@@ -227,7 +231,9 @@ class ConfigSplitter:
"max_pages": 500, # Router only needs overview pages "max_pages": 500, # Router only needs overview pages
"_router": True, "_router": True,
"_sub_skills": [cfg["name"] for cfg in sub_configs], "_sub_skills": [cfg["name"] for cfg in sub_configs],
"_routing_keywords": {cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs}, "_routing_keywords": {
cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs
},
} }
return router_config return router_config
@@ -333,11 +339,17 @@ Config Types:
help="Splitting strategy (default: auto)", help="Splitting strategy (default: auto)",
) )
parser.add_argument("--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)") parser.add_argument(
"--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)"
)
parser.add_argument("--output-dir", help="Output directory for configs (default: same as input)") parser.add_argument(
"--output-dir", help="Output directory for configs (default: same as input)"
)
parser.add_argument("--dry-run", action="store_true", help="Show what would be created without saving files") parser.add_argument(
"--dry-run", action="store_true", help="Show what would be created without saving files"
)
args = parser.parse_args() args = parser.parse_args()

View File

@@ -538,9 +538,13 @@ def _validate_patterns(patterns: dict[str, list[tuple[str, int]]]) -> None:
raise ValueError(f"Pattern {i} for '{lang}' is not a (regex, weight) tuple: {item}") raise ValueError(f"Pattern {i} for '{lang}' is not a (regex, weight) tuple: {item}")
pattern, weight = item pattern, weight = item
if not isinstance(pattern, str): if not isinstance(pattern, str):
raise ValueError(f"Pattern {i} for '{lang}': regex must be a string, got {type(pattern).__name__}") raise ValueError(
f"Pattern {i} for '{lang}': regex must be a string, got {type(pattern).__name__}"
)
if not isinstance(weight, int) or weight < 1 or weight > 5: if not isinstance(weight, int) or weight < 1 or weight > 5:
raise ValueError(f"Pattern {i} for '{lang}': weight must be int 1-5, got {weight!r}") raise ValueError(
f"Pattern {i} for '{lang}': weight must be int 1-5, got {weight!r}"
)
# Validate patterns at module load time # Validate patterns at module load time

View File

@@ -251,7 +251,9 @@ class PythonTestAnalyzer:
# Process each test method # Process each test method
for node in class_node.body: for node in class_node.body:
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
examples.extend(self._analyze_test_body(node, file_path, imports, setup_code=setup_code)) examples.extend(
self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
)
return examples return examples
@@ -283,7 +285,11 @@ class PythonTestAnalyzer:
return None return None
def _analyze_test_body( def _analyze_test_body(
self, func_node: ast.FunctionDef, file_path: str, imports: list[str], setup_code: str | None = None self,
func_node: ast.FunctionDef,
file_path: str,
imports: list[str],
setup_code: str | None = None,
) -> list[TestExample]: ) -> list[TestExample]:
"""Analyze test function body for extractable patterns""" """Analyze test function body for extractable patterns"""
examples = [] examples = []
@@ -297,7 +303,9 @@ class PythonTestAnalyzer:
# Extract different pattern categories # Extract different pattern categories
# 1. Instantiation patterns # 1. Instantiation patterns
instantiations = self._find_instantiations(func_node, file_path, docstring, setup_code, tags, imports) instantiations = self._find_instantiations(
func_node, file_path, docstring, setup_code, tags, imports
)
examples.extend(instantiations) examples.extend(instantiations)
# 2. Method calls with assertions # 2. Method calls with assertions
@@ -307,7 +315,9 @@ class PythonTestAnalyzer:
examples.extend(method_calls) examples.extend(method_calls)
# 3. Configuration dictionaries # 3. Configuration dictionaries
configs = self._find_config_dicts(func_node, file_path, docstring, setup_code, tags, imports) configs = self._find_config_dicts(
func_node, file_path, docstring, setup_code, tags, imports
)
examples.extend(configs) examples.extend(configs)
# 4. Multi-step workflows (integration tests) # 4. Multi-step workflows (integration tests)
@@ -707,7 +717,13 @@ class GenericTestAnalyzer:
return examples return examples
def _create_example( def _create_example(
self, test_name: str, category: str, code: str, language: str, file_path: str, line_number: int self,
test_name: str,
category: str,
code: str,
language: str,
file_path: str,
line_number: int,
) -> TestExample: ) -> TestExample:
"""Create TestExample from regex match""" """Create TestExample from regex match"""
return TestExample( return TestExample(
@@ -891,7 +907,9 @@ class TestExampleExtractor:
# Limit per file # Limit per file
if len(filtered_examples) > self.max_per_file: if len(filtered_examples) > self.max_per_file:
# Sort by confidence and take top N # Sort by confidence and take top N
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[: self.max_per_file] filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
: self.max_per_file
]
logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}") logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
@@ -915,7 +933,10 @@ class TestExampleExtractor:
return self.LANGUAGE_MAP.get(suffix, "Unknown") return self.LANGUAGE_MAP.get(suffix, "Unknown")
def _create_report( def _create_report(
self, examples: list[TestExample], file_path: str | None = None, directory: str | None = None self,
examples: list[TestExample],
file_path: str | None = None,
directory: str | None = None,
) -> ExampleReport: ) -> ExampleReport:
"""Create summary report from examples""" """Create summary report from examples"""
# Enhance examples with AI analysis (C3.6) # Enhance examples with AI analysis (C3.6)
@@ -932,15 +953,21 @@ class TestExampleExtractor:
# Count by category # Count by category
examples_by_category = {} examples_by_category = {}
for example in examples: for example in examples:
examples_by_category[example.category] = examples_by_category.get(example.category, 0) + 1 examples_by_category[example.category] = (
examples_by_category.get(example.category, 0) + 1
)
# Count by language # Count by language
examples_by_language = {} examples_by_language = {}
for example in examples: for example in examples:
examples_by_language[example.language] = examples_by_language.get(example.language, 0) + 1 examples_by_language[example.language] = (
examples_by_language.get(example.language, 0) + 1
)
# Calculate averages # Calculate averages
avg_complexity = sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0 avg_complexity = (
sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
)
high_value_count = sum(1 for ex in examples if ex.confidence > 0.7) high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
return ExampleReport( return ExampleReport(
@@ -983,15 +1010,25 @@ Examples:
parser.add_argument("directory", nargs="?", help="Directory containing test files") parser.add_argument("directory", nargs="?", help="Directory containing test files")
parser.add_argument("--file", help="Single test file to analyze") parser.add_argument("--file", help="Single test file to analyze")
parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
parser.add_argument( parser.add_argument(
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)" "--language", help="Filter by programming language (python, javascript, etc.)"
)
parser.add_argument(
"--min-confidence",
type=float,
default=0.5,
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
)
parser.add_argument(
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
) )
parser.add_argument("--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)")
parser.add_argument("--json", action="store_true", help="Output JSON format") parser.add_argument("--json", action="store_true", help="Output JSON format")
parser.add_argument("--markdown", action="store_true", help="Output Markdown format") parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
parser.add_argument( parser.add_argument(
"--recursive", action="store_true", default=True, help="Search directory recursively (default: True)" "--recursive",
action="store_true",
default=True,
help="Search directory recursively (default: True)",
) )
args = parser.parse_args() args = parser.parse_args()

View File

@@ -21,7 +21,12 @@ def test_validate_existing_unified_configs():
"""Test that all existing unified configs are valid""" """Test that all existing unified configs are valid"""
configs_dir = Path(__file__).parent.parent / "configs" configs_dir = Path(__file__).parent.parent / "configs"
unified_configs = ["godot_unified.json", "react_unified.json", "django_unified.json", "fastapi_unified.json"] unified_configs = [
"godot_unified.json",
"react_unified.json",
"django_unified.json",
"fastapi_unified.json",
]
for config_name in unified_configs: for config_name in unified_configs:
config_path = configs_dir / config_name config_path = configs_dir / config_name
@@ -56,8 +61,18 @@ def test_create_temp_unified_config():
"description": "Test unified config", "description": "Test unified config",
"merge_mode": "rule-based", "merge_mode": "rule-based",
"sources": [ "sources": [
{"type": "documentation", "base_url": "https://example.com/docs", "extract_api": True, "max_pages": 50}, {
{"type": "github", "repo": "test/repo", "include_code": True, "code_analysis_depth": "surface"}, "type": "documentation",
"base_url": "https://example.com/docs",
"extract_api": True,
"max_pages": 50,
},
{
"type": "github",
"repo": "test/repo",
"include_code": True,
"code_analysis_depth": "surface",
},
], ],
} }

View File

@@ -69,7 +69,11 @@ class UnifiedCodebaseAnalyzer:
self.github_token = github_token or os.getenv("GITHUB_TOKEN") self.github_token = github_token or os.getenv("GITHUB_TOKEN")
def analyze( def analyze(
self, source: str, depth: str = "c3x", fetch_github_metadata: bool = True, output_dir: Path | None = None self,
source: str,
depth: str = "c3x",
fetch_github_metadata: bool = True,
output_dir: Path | None = None,
) -> AnalysisResult: ) -> AnalysisResult:
""" """
Analyze codebase with specified depth. Analyze codebase with specified depth.
@@ -123,7 +127,9 @@ class UnifiedCodebaseAnalyzer:
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'") raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
# Build result with all streams # Build result with all streams
result = AnalysisResult(code_analysis=code_analysis, source_type="github", analysis_depth=depth) result = AnalysisResult(
code_analysis=code_analysis, source_type="github", analysis_depth=depth
)
# Add GitHub-specific data if available # Add GitHub-specific data if available
if fetch_metadata: if fetch_metadata:
@@ -168,7 +174,9 @@ class UnifiedCodebaseAnalyzer:
else: else:
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'") raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
return AnalysisResult(code_analysis=code_analysis, source_type="local", analysis_depth=depth) return AnalysisResult(
code_analysis=code_analysis, source_type="local", analysis_depth=depth
)
def basic_analysis(self, directory: Path) -> dict: def basic_analysis(self, directory: Path) -> dict:
""" """
@@ -423,7 +431,9 @@ class UnifiedCodebaseAnalyzer:
# Only include immediate subdirectories # Only include immediate subdirectories
structure["children"].append({"name": item.name, "type": "directory"}) structure["children"].append({"name": item.name, "type": "directory"})
elif item.is_file(): elif item.is_file():
structure["children"].append({"name": item.name, "type": "file", "extension": item.suffix}) structure["children"].append(
{"name": item.name, "type": "file", "extension": item.suffix}
)
except Exception: except Exception:
pass pass

View File

@@ -406,7 +406,13 @@ class UnifiedScraper:
# Append to list instead of overwriting (multi-source support) # Append to list instead of overwriting (multi-source support)
self.scraped_data["github"].append( self.scraped_data["github"].append(
{"repo": repo, "repo_id": repo_id, "idx": idx, "data": github_data, "data_file": github_data_file} {
"repo": repo,
"repo_id": repo_id,
"idx": idx,
"data": github_data,
"data_file": github_data_file,
}
) )
# Build standalone SKILL.md for synthesis using GitHubToSkillConverter # Build standalone SKILL.md for synthesis using GitHubToSkillConverter
@@ -433,7 +439,9 @@ class UnifiedScraper:
logger.info(f"📦 Moved GitHub output to cache: {cache_github_dir}") logger.info(f"📦 Moved GitHub output to cache: {cache_github_dir}")
if os.path.exists(github_data_file_path): if os.path.exists(github_data_file_path):
cache_github_data = os.path.join(self.data_dir, f"{github_config['name']}_github_data.json") cache_github_data = os.path.join(
self.data_dir, f"{github_config['name']}_github_data.json"
)
if os.path.exists(cache_github_data): if os.path.exists(cache_github_data):
os.remove(cache_github_data) os.remove(cache_github_data)
shutil.move(github_data_file_path, cache_github_data) shutil.move(github_data_file_path, cache_github_data)
@@ -478,7 +486,13 @@ class UnifiedScraper:
# Append to list instead of overwriting # Append to list instead of overwriting
self.scraped_data["pdf"].append( self.scraped_data["pdf"].append(
{"pdf_path": pdf_path, "pdf_id": pdf_id, "idx": idx, "data": pdf_data, "data_file": pdf_data_file} {
"pdf_path": pdf_path,
"pdf_id": pdf_id,
"idx": idx,
"data": pdf_data,
"data_file": pdf_data_file,
}
) )
# Build standalone SKILL.md for synthesis # Build standalone SKILL.md for synthesis
@@ -611,12 +625,20 @@ class UnifiedScraper:
# Load C3.x outputs into memory # Load C3.x outputs into memory
c3_data = { c3_data = {
"patterns": self._load_json(temp_output / "patterns" / "detected_patterns.json"), "patterns": self._load_json(temp_output / "patterns" / "detected_patterns.json"),
"test_examples": self._load_json(temp_output / "test_examples" / "test_examples.json"), "test_examples": self._load_json(
temp_output / "test_examples" / "test_examples.json"
),
"how_to_guides": self._load_guide_collection(temp_output / "tutorials"), "how_to_guides": self._load_guide_collection(temp_output / "tutorials"),
"config_patterns": self._load_json(temp_output / "config_patterns" / "config_patterns.json"), "config_patterns": self._load_json(
"architecture": self._load_json(temp_output / "architecture" / "architectural_patterns.json"), temp_output / "config_patterns" / "config_patterns.json"
),
"architecture": self._load_json(
temp_output / "architecture" / "architectural_patterns.json"
),
"api_reference": self._load_api_reference(temp_output / "api_reference"), # C2.5 "api_reference": self._load_api_reference(temp_output / "api_reference"), # C2.5
"dependency_graph": self._load_json(temp_output / "dependencies" / "dependency_graph.json"), # C2.6 "dependency_graph": self._load_json(
temp_output / "dependencies" / "dependency_graph.json"
), # C2.6
} }
# Log summary # Log summary
@@ -769,7 +791,9 @@ class UnifiedScraper:
conflicts = conflicts_data.get("conflicts", []) conflicts = conflicts_data.get("conflicts", [])
# Build skill # Build skill
builder = UnifiedSkillBuilder(self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir) builder = UnifiedSkillBuilder(
self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir
)
builder.build() builder.build()
@@ -836,7 +860,10 @@ Examples:
parser.add_argument("--config", "-c", required=True, help="Path to unified config JSON file") parser.add_argument("--config", "-c", required=True, help="Path to unified config JSON file")
parser.add_argument( parser.add_argument(
"--merge-mode", "-m", choices=["rule-based", "claude-enhanced"], help="Override config merge mode" "--merge-mode",
"-m",
choices=["rule-based", "claude-enhanced"],
help="Override config merge mode",
) )
parser.add_argument( parser.add_argument(
"--skip-codebase-analysis", "--skip-codebase-analysis",
@@ -854,7 +881,9 @@ Examples:
for source in scraper.config.get("sources", []): for source in scraper.config.get("sources", []):
if source["type"] == "github": if source["type"] == "github":
source["enable_codebase_analysis"] = False source["enable_codebase_analysis"] = False
logger.info(f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}") logger.info(
f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}"
)
# Run scraper # Run scraper
scraper.run() scraper.run()

View File

@@ -97,7 +97,9 @@ class UnifiedSkillBuilder:
if docs_skill_path.exists(): if docs_skill_path.exists():
try: try:
skill_mds["documentation"] = docs_skill_path.read_text(encoding="utf-8") skill_mds["documentation"] = docs_skill_path.read_text(encoding="utf-8")
logger.debug(f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)") logger.debug(
f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)"
)
except OSError as e: except OSError as e:
logger.warning(f"Failed to read documentation SKILL.md: {e}") logger.warning(f"Failed to read documentation SKILL.md: {e}")
@@ -109,7 +111,9 @@ class UnifiedSkillBuilder:
try: try:
content = github_skill_path.read_text(encoding="utf-8") content = github_skill_path.read_text(encoding="utf-8")
github_sources.append(content) github_sources.append(content)
logger.debug(f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)") logger.debug(
f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)"
)
except OSError as e: except OSError as e:
logger.warning(f"Failed to read GitHub SKILL.md from {github_dir.name}: {e}") logger.warning(f"Failed to read GitHub SKILL.md from {github_dir.name}: {e}")
@@ -165,7 +169,23 @@ class UnifiedSkillBuilder:
current_section = line[3:].strip() current_section = line[3:].strip()
# Remove emoji and markdown formatting # Remove emoji and markdown formatting
current_section = current_section.split("](")[0] # Remove links current_section = current_section.split("](")[0] # Remove links
for emoji in ["📚", "🏗️", "⚠️", "🔧", "📖", "💡", "🎯", "📊", "🔍", "⚙️", "🧪", "📝", "🗂️", "📐", ""]: for emoji in [
"📚",
"🏗️",
"⚠️",
"🔧",
"📖",
"💡",
"🎯",
"📊",
"🔍",
"⚙️",
"🧪",
"📝",
"🗂️",
"📐",
"",
]:
current_section = current_section.replace(emoji, "").strip() current_section = current_section.replace(emoji, "").strip()
current_content = [] current_content = []
elif current_section: elif current_section:
@@ -268,7 +288,9 @@ This skill synthesizes knowledge from multiple sources:
if "Quick Reference" in github_sections: if "Quick Reference" in github_sections:
# Include GitHub's Quick Reference (contains design patterns summary) # Include GitHub's Quick Reference (contains design patterns summary)
logger.info(f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)") logger.info(
f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)"
)
content += github_sections["Quick Reference"] + "\n\n" content += github_sections["Quick Reference"] + "\n\n"
else: else:
logger.warning("DEBUG: GitHub Quick Reference section NOT FOUND!") logger.warning("DEBUG: GitHub Quick Reference section NOT FOUND!")
@@ -330,7 +352,9 @@ This skill synthesizes knowledge from multiple sources:
# Footer # Footer
content += "---\n\n" content += "---\n\n"
content += "*Synthesized from official documentation and codebase analysis by Skill Seekers*\n" content += (
"*Synthesized from official documentation and codebase analysis by Skill Seekers*\n"
)
return content return content
@@ -602,7 +626,9 @@ This skill combines knowledge from multiple sources:
# Count by type # Count by type
by_type = {} by_type = {}
for conflict in self.conflicts: for conflict in self.conflicts:
ctype = conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown") ctype = (
conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown")
)
by_type[ctype] = by_type.get(ctype, 0) + 1 by_type[ctype] = by_type.get(ctype, 0) + 1
content += "**Conflict Breakdown:**\n" content += "**Conflict Breakdown:**\n"
@@ -836,7 +862,9 @@ This skill combines knowledge from multiple sources:
source_id = doc_source.get("source_id", "unknown") source_id = doc_source.get("source_id", "unknown")
base_url = doc_source.get("base_url", "Unknown") base_url = doc_source.get("base_url", "Unknown")
total_pages = doc_source.get("total_pages", "N/A") total_pages = doc_source.get("total_pages", "N/A")
f.write(f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n") f.write(
f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n"
)
logger.info(f"Created documentation references ({len(docs_list)} sources)") logger.info(f"Created documentation references ({len(docs_list)} sources)")
@@ -1084,9 +1112,13 @@ This skill combines knowledge from multiple sources:
pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1 pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
if pattern_summary: if pattern_summary:
for ptype, count in sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True): for ptype, count in sorted(
pattern_summary.items(), key=lambda x: x[1], reverse=True
):
f.write(f"- **{ptype}**: {count} instance(s)\n") f.write(f"- **{ptype}**: {count} instance(s)\n")
f.write("\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n") f.write(
"\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n"
)
else: else:
f.write("*No design patterns detected.*\n\n") f.write("*No design patterns detected.*\n\n")
@@ -1115,7 +1147,9 @@ This skill combines knowledge from multiple sources:
f.write("\n**Recommended Actions**:\n") f.write("\n**Recommended Actions**:\n")
for action in insights["recommended_actions"][:5]: for action in insights["recommended_actions"][:5]:
f.write(f"- {action}\n") f.write(f"- {action}\n")
f.write("\n📁 See `references/codebase_analysis/configuration/` for details.\n\n") f.write(
"\n📁 See `references/codebase_analysis/configuration/` for details.\n\n"
)
else: else:
f.write("*No configuration files detected.*\n\n") f.write("*No configuration files detected.*\n\n")
@@ -1128,7 +1162,9 @@ This skill combines knowledge from multiple sources:
f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n") f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n")
for guide in guides[:10]: # Top 10 for guide in guides[:10]: # Top 10
f.write(f"- {guide.get('title', 'Untitled Guide')}\n") f.write(f"- {guide.get('title', 'Untitled Guide')}\n")
f.write("\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n") f.write(
"\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n"
)
else: else:
f.write("*No workflow guides extracted.*\n\n") f.write("*No workflow guides extracted.*\n\n")
@@ -1147,11 +1183,15 @@ This skill combines knowledge from multiple sources:
if examples.get("examples_by_category"): if examples.get("examples_by_category"):
f.write("\n**By Category**:\n") f.write("\n**By Category**:\n")
for cat, count in sorted( for cat, count in sorted(
examples["examples_by_category"].items(), key=lambda x: x[1], reverse=True examples["examples_by_category"].items(),
key=lambda x: x[1],
reverse=True,
): ):
f.write(f"- {cat}: {count}\n") f.write(f"- {cat}: {count}\n")
f.write("\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n") f.write(
"\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n"
)
else: else:
f.write("*No test examples extracted.*\n\n") f.write("*No test examples extracted.*\n\n")
@@ -1163,13 +1203,17 @@ This skill combines knowledge from multiple sources:
dir_struct = c3_data["architecture"].get("directory_structure", {}) dir_struct = c3_data["architecture"].get("directory_structure", {})
if dir_struct: if dir_struct:
f.write("**Main Directories**:\n") f.write("**Main Directories**:\n")
for dir_name, file_count in sorted(dir_struct.items(), key=lambda x: x[1], reverse=True)[:15]: for dir_name, file_count in sorted(
dir_struct.items(), key=lambda x: x[1], reverse=True
)[:15]:
f.write(f"- `{dir_name}/`: {file_count} file(s)\n") f.write(f"- `{dir_name}/`: {file_count} file(s)\n")
f.write("\n") f.write("\n")
# Footer # Footer
f.write("---\n\n") f.write("---\n\n")
f.write("*This architecture overview was automatically generated by C3.x codebase analysis.*\n") f.write(
"*This architecture overview was automatically generated by C3.x codebase analysis.*\n"
)
f.write("*Last updated: skill build time*\n") f.write("*Last updated: skill build time*\n")
logger.info("📐 Created ARCHITECTURE.md") logger.info("📐 Created ARCHITECTURE.md")
@@ -1277,7 +1321,9 @@ This skill combines knowledge from multiple sources:
if guides: if guides:
f.write("## Available Guides\n\n") f.write("## Available Guides\n\n")
for guide in guides: for guide in guides:
f.write(f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n") f.write(
f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n"
)
f.write("\n") f.write("\n")
# Save individual guide markdown files # Save individual guide markdown files
@@ -1351,7 +1397,9 @@ This skill combines knowledge from multiple sources:
if insights: if insights:
f.write("## Overall Insights\n\n") f.write("## Overall Insights\n\n")
if insights.get("security_issues_found"): if insights.get("security_issues_found"):
f.write(f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n") f.write(
f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n"
)
if insights.get("recommended_actions"): if insights.get("recommended_actions"):
f.write("**Recommended Actions**:\n") f.write("**Recommended Actions**:\n")
for action in insights["recommended_actions"]: for action in insights["recommended_actions"]:
@@ -1425,7 +1473,9 @@ This skill combines knowledge from multiple sources:
top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3] top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3]
if top_patterns: if top_patterns:
content += f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n" content += (
f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
)
content += "\n" content += "\n"
# Add test examples summary # Add test examples summary
@@ -1449,7 +1499,9 @@ This skill combines knowledge from multiple sources:
# Add security warning if present # Add security warning if present
if c3_data["config_patterns"].get("ai_enhancements"): if c3_data["config_patterns"].get("ai_enhancements"):
insights = c3_data["config_patterns"]["ai_enhancements"].get("overall_insights", {}) insights = c3_data["config_patterns"]["ai_enhancements"].get(
"overall_insights", {}
)
security_issues = insights.get("security_issues_found", 0) security_issues = insights.get("security_issues_found", 0)
if security_issues > 0: if security_issues > 0:
content += f"- 🔐 **Security Alert**: {security_issues} issue(s) detected\n" content += f"- 🔐 **Security Alert**: {security_issues} issue(s) detected\n"
@@ -1477,7 +1529,8 @@ This skill combines knowledge from multiple sources:
medium = [ medium = [
c c
for c in self.conflicts for c in self.conflicts
if (hasattr(c, "severity") and c.severity == "medium") or c.get("severity") == "medium" if (hasattr(c, "severity") and c.severity == "medium")
or c.get("severity") == "medium"
] ]
low = [ low = [
c c
@@ -1497,9 +1550,15 @@ This skill combines knowledge from multiple sources:
for conflict in high: for conflict in high:
api_name = ( api_name = (
conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown") conflict.api_name
if hasattr(conflict, "api_name")
else conflict.get("api_name", "Unknown")
)
diff = (
conflict.difference
if hasattr(conflict, "difference")
else conflict.get("difference", "N/A")
) )
diff = conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A")
f.write(f"### {api_name}\n\n") f.write(f"### {api_name}\n\n")
f.write(f"**Issue**: {diff}\n\n") f.write(f"**Issue**: {diff}\n\n")
@@ -1510,9 +1569,15 @@ This skill combines knowledge from multiple sources:
for conflict in medium[:20]: # Limit to 20 for conflict in medium[:20]: # Limit to 20
api_name = ( api_name = (
conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown") conflict.api_name
if hasattr(conflict, "api_name")
else conflict.get("api_name", "Unknown")
)
diff = (
conflict.difference
if hasattr(conflict, "difference")
else conflict.get("difference", "N/A")
) )
diff = conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A")
f.write(f"### {api_name}\n\n") f.write(f"### {api_name}\n\n")
f.write(f"{diff}\n\n") f.write(f"{diff}\n\n")
@@ -1534,7 +1599,9 @@ if __name__ == "__main__":
config = json.load(f) config = json.load(f)
# Mock scraped data # Mock scraped data
scraped_data = {"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}} scraped_data = {
"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}
}
builder = UnifiedSkillBuilder(config, scraped_data) builder = UnifiedSkillBuilder(config, scraped_data)
builder.build() builder.build()

View File

@@ -179,7 +179,9 @@ def validate_zip_file(zip_path: str | Path) -> tuple[bool, str | None]:
return True, None return True, None
def read_reference_files(skill_dir: str | Path, max_chars: int = 100000, preview_limit: int = 40000) -> dict[str, dict]: def read_reference_files(
skill_dir: str | Path, max_chars: int = 100000, preview_limit: int = 40000
) -> dict[str, dict]:
"""Read reference files from a skill directory with enriched metadata. """Read reference files from a skill directory with enriched metadata.
This function reads markdown files from the references/ subdirectory This function reads markdown files from the references/ subdirectory
@@ -319,7 +321,10 @@ def read_reference_files(skill_dir: str | Path, max_chars: int = 100000, preview
def retry_with_backoff( def retry_with_backoff(
operation: Callable[[], T], max_attempts: int = 3, base_delay: float = 1.0, operation_name: str = "operation" operation: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
operation_name: str = "operation",
) -> T: ) -> T:
"""Retry an operation with exponential backoff. """Retry an operation with exponential backoff.
@@ -355,7 +360,12 @@ def retry_with_backoff(
if attempt < max_attempts: if attempt < max_attempts:
delay = base_delay * (2 ** (attempt - 1)) delay = base_delay * (2 ** (attempt - 1))
logger.warning( logger.warning(
"%s failed (attempt %d/%d), retrying in %.1fs: %s", operation_name, attempt, max_attempts, delay, e "%s failed (attempt %d/%d), retrying in %.1fs: %s",
operation_name,
attempt,
max_attempts,
delay,
e,
) )
time.sleep(delay) time.sleep(delay)
else: else:
@@ -368,7 +378,10 @@ def retry_with_backoff(
async def retry_with_backoff_async( async def retry_with_backoff_async(
operation: Callable[[], T], max_attempts: int = 3, base_delay: float = 1.0, operation_name: str = "operation" operation: Callable[[], T],
max_attempts: int = 3,
base_delay: float = 1.0,
operation_name: str = "operation",
) -> T: ) -> T:
"""Async version of retry_with_backoff for async operations. """Async version of retry_with_backoff for async operations.
@@ -403,7 +416,12 @@ async def retry_with_backoff_async(
if attempt < max_attempts: if attempt < max_attempts:
delay = base_delay * (2 ** (attempt - 1)) delay = base_delay * (2 ** (attempt - 1))
logger.warning( logger.warning(
"%s failed (attempt %d/%d), retrying in %.1fs: %s", operation_name, attempt, max_attempts, delay, e "%s failed (attempt %d/%d), retrying in %.1fs: %s",
operation_name,
attempt,
max_attempts,
delay,
e,
) )
await asyncio.sleep(delay) await asyncio.sleep(delay)
else: else:

View File

@@ -138,7 +138,9 @@ class AgentDetector:
return None return None
return self.AGENT_CONFIG[agent_id]["transport"] return self.AGENT_CONFIG[agent_id]["transport"]
def generate_config(self, agent_id: str, server_command: str, http_port: int | None = 3000) -> str | None: def generate_config(
self, agent_id: str, server_command: str, http_port: int | None = 3000
) -> str | None:
""" """
Generate MCP configuration for a specific agent. Generate MCP configuration for a specific agent.
@@ -282,7 +284,9 @@ def detect_agents() -> list[dict[str, str]]:
return detector.detect_agents() return detector.detect_agents()
def generate_config(agent_name: str, server_command: str = "skill-seekers mcp", http_port: int = 3000) -> str | None: def generate_config(
agent_name: str, server_command: str = "skill-seekers mcp", http_port: int = 3000
) -> str | None:
""" """
Convenience function to generate config for a specific agent. Convenience function to generate config for a specific agent.

View File

@@ -118,7 +118,8 @@ class GitConfigRepo:
) from e ) from e
elif "not found" in error_msg.lower() or "404" in error_msg: elif "not found" in error_msg.lower() or "404" in error_msg:
raise GitCommandError( raise GitCommandError(
f"Repository not found: {git_url}. Verify the URL is correct and you have access.", 128 f"Repository not found: {git_url}. Verify the URL is correct and you have access.",
128,
) from e ) from e
else: else:
raise GitCommandError(f"Failed to clone repository: {error_msg}", 128) from e raise GitCommandError(f"Failed to clone repository: {error_msg}", 128) from e

View File

@@ -139,14 +139,20 @@ try:
inputSchema={"type": "object", "properties": {}}, inputSchema={"type": "object", "properties": {}},
), ),
Tool( Tool(
name="scrape_docs", description="Scrape documentation", inputSchema={"type": "object", "properties": {}} name="scrape_docs",
description="Scrape documentation",
inputSchema={"type": "object", "properties": {}},
), ),
Tool( Tool(
name="scrape_github", name="scrape_github",
description="Scrape GitHub repository", description="Scrape GitHub repository",
inputSchema={"type": "object", "properties": {}}, inputSchema={"type": "object", "properties": {}},
), ),
Tool(name="scrape_pdf", description="Scrape PDF file", inputSchema={"type": "object", "properties": {}}), Tool(
name="scrape_pdf",
description="Scrape PDF file",
inputSchema={"type": "object", "properties": {}},
),
Tool( Tool(
name="package_skill", name="package_skill",
description="Package skill into .zip", description="Package skill into .zip",
@@ -157,9 +163,15 @@ try:
description="Upload skill to Claude", description="Upload skill to Claude",
inputSchema={"type": "object", "properties": {}}, inputSchema={"type": "object", "properties": {}},
), ),
Tool(name="install_skill", description="Install skill", inputSchema={"type": "object", "properties": {}}),
Tool( Tool(
name="split_config", description="Split large config", inputSchema={"type": "object", "properties": {}} name="install_skill",
description="Install skill",
inputSchema={"type": "object", "properties": {}},
),
Tool(
name="split_config",
description="Split large config",
inputSchema={"type": "object", "properties": {}},
), ),
Tool( Tool(
name="generate_router", name="generate_router",

View File

@@ -726,7 +726,13 @@ async def estimate_pages_tool(args: dict) -> list[TextContent]:
timeout = max(300, max_discovery // 2) # Minimum 5 minutes timeout = max(300, max_discovery // 2) # Minimum 5 minutes
# Run estimate_pages.py # Run estimate_pages.py
cmd = [sys.executable, str(CLI_DIR / "estimate_pages.py"), config_path, "--max-discovery", str(max_discovery)] cmd = [
sys.executable,
str(CLI_DIR / "estimate_pages.py"),
config_path,
"--max-discovery",
str(max_discovery),
]
progress_msg = "🔄 Estimating page count...\n" progress_msg = "🔄 Estimating page count...\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n" progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
@@ -980,7 +986,9 @@ async def validate_config_tool(args: dict) -> list[TextContent]:
try: try:
# Check if file exists # Check if file exists
if not Path(config_path).exists(): if not Path(config_path).exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")] return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
# Try unified config validator first # Try unified config validator first
try: try:
@@ -1004,7 +1012,9 @@ async def validate_config_tool(args: dict) -> list[TextContent]:
result += f" Max pages: {source.get('max_pages', 'Not set')}\n" result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
elif source["type"] == "github": elif source["type"] == "github":
result += f" Repo: {source.get('repo', 'N/A')}\n" result += f" Repo: {source.get('repo', 'N/A')}\n"
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n" result += (
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
)
elif source["type"] == "pdf": elif source["type"] == "pdf":
result += f" Path: {source.get('path', 'N/A')}\n" result += f" Path: {source.get('path', 'N/A')}\n"
@@ -1106,7 +1116,9 @@ async def generate_router_tool(args: dict) -> list[TextContent]:
config_files = glob.glob(config_pattern) config_files = glob.glob(config_pattern)
if not config_files: if not config_files:
return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")] return [
TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
]
# Run generate_router.py # Run generate_router.py
cmd = [ cmd = [
@@ -1159,7 +1171,11 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
cmd.extend(["--from-json", from_json]) cmd.extend(["--from-json", from_json])
else: else:
return [TextContent(type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json")] return [
TextContent(
type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
)
]
# Run pdf_scraper.py with streaming (can take a while) # Run pdf_scraper.py with streaming (can take a while)
timeout = 600 # 10 minutes for PDF extraction timeout = 600 # 10 minutes for PDF extraction
@@ -1257,7 +1273,12 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
# MODE 1: Named Source (highest priority) # MODE 1: Named Source (highest priority)
if source_name: if source_name:
if not config_name: if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using source parameter")] return [
TextContent(
type="text",
text="❌ Error: config_name is required when using source parameter",
)
]
# Get source from registry # Get source from registry
source_manager = SourceManager() source_manager = SourceManager()
@@ -1278,7 +1299,11 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
git_repo = GitConfigRepo() git_repo = GitConfigRepo()
try: try:
repo_path = git_repo.clone_or_pull( repo_path = git_repo.clone_or_pull(
source_name=source_name, git_url=git_url, branch=branch, token=token, force_refresh=force_refresh source_name=source_name,
git_url=git_url,
branch=branch,
token=token,
force_refresh=force_refresh,
) )
except Exception as e: except Exception as e:
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")] return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
@@ -1320,7 +1345,12 @@ Next steps:
# MODE 2: Direct Git URL # MODE 2: Direct Git URL
elif git_url: elif git_url:
if not config_name: if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using git_url parameter")] return [
TextContent(
type="text",
text="❌ Error: config_name is required when using git_url parameter",
)
]
# Clone/pull repository # Clone/pull repository
git_repo = GitConfigRepo() git_repo = GitConfigRepo()
@@ -1418,7 +1448,9 @@ Next steps:
if tags: if tags:
result += f" Tags: {tags}\n" result += f" Tags: {tags}\n"
result += "\n💡 To download a config, use: fetch_config with config_name='<name>'\n" result += (
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
)
result += f"📚 API Docs: {API_BASE_URL}/docs\n" result += f"📚 API Docs: {API_BASE_URL}/docs\n"
return [TextContent(type="text", text=result)] return [TextContent(type="text", text=result)]
@@ -1426,7 +1458,10 @@ Next steps:
# Download specific config # Download specific config
if not config_name: if not config_name:
return [ return [
TextContent(type="text", text="❌ Error: Please provide config_name or set list_available=true") TextContent(
type="text",
text="❌ Error: Please provide config_name or set list_available=true",
)
] ]
# Get config details first # Get config details first
@@ -1486,11 +1521,14 @@ Next steps:
except httpx.HTTPError as e: except httpx.HTTPError as e:
return [ return [
TextContent( TextContent(
type="text", text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later." type="text",
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
) )
] ]
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
return [TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")] return [
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
]
except Exception as e: except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")] return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
@@ -1575,7 +1613,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
if not dry_run: if not dry_run:
# Call fetch_config_tool directly # Call fetch_config_tool directly
fetch_result = await fetch_config_tool({"config_name": config_name, "destination": destination}) fetch_result = await fetch_config_tool(
{"config_name": config_name, "destination": destination}
)
# Parse result to extract config path # Parse result to extract config path
fetch_output = fetch_result[0].text fetch_output = fetch_result[0].text
@@ -1589,7 +1629,12 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["config_path"] = match.group(1).strip() workflow_state["config_path"] = match.group(1).strip()
output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}") output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
else: else:
return [TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Failed to fetch config")] return [
TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
)
]
workflow_state["phases_completed"].append("fetch_config") workflow_state["phases_completed"].append("fetch_config")
else: else:
@@ -1614,7 +1659,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["skill_name"] = config.get("name", "unknown") workflow_state["skill_name"] = config.get("name", "unknown")
except Exception as e: except Exception as e:
return [ return [
TextContent(type="text", text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}") TextContent(
type="text",
text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
)
] ]
# Call scrape_docs_tool (does NOT include enhancement) # Call scrape_docs_tool (does NOT include enhancement)
@@ -1638,7 +1686,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
# Check for success # Check for success
if "" in scrape_output: if "" in scrape_output:
return [ return [
TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above") TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
)
] ]
workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}" workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
@@ -1738,7 +1789,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
if not dry_run: if not dry_run:
if has_api_key: if has_api_key:
# Call upload_skill_tool # Call upload_skill_tool
upload_result = await upload_skill_tool({"skill_zip": workflow_state["zip_path"]}) upload_result = await upload_skill_tool(
{"skill_zip": workflow_state["zip_path"]}
)
upload_output = upload_result[0].text upload_output = upload_result[0].text
output_lines.append(upload_output) output_lines.append(upload_output)
@@ -1813,7 +1866,10 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
from github import Github, GithubException from github import Github, GithubException
except ImportError: except ImportError:
return [ return [
TextContent(type="text", text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub") TextContent(
type="text",
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
)
] ]
config_path = args.get("config_path") config_path = args.get("config_path")
@@ -1826,7 +1882,9 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if config_path: if config_path:
config_file = Path(config_path) config_file = Path(config_path)
if not config_file.exists(): if not config_file.exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")] return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
with open(config_file) as f: with open(config_file) as f:
config_data = json.load(f) config_data = json.load(f)
@@ -1841,7 +1899,11 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")] return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
else: else:
return [TextContent(type="text", text="❌ Error: Must provide either config_path or config_json")] return [
TextContent(
type="text", text="❌ Error: Must provide either config_path or config_json"
)
]
# Use ConfigValidator for comprehensive validation # Use ConfigValidator for comprehensive validation
if ConfigValidator is None: if ConfigValidator is None:
@@ -1871,14 +1933,20 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if not is_unified: if not is_unified:
# Legacy config - check base_url # Legacy config - check base_url
base_url = config_data.get("base_url", "") base_url = config_data.get("base_url", "")
if base_url and not (base_url.startswith("http://") or base_url.startswith("https://")): if base_url and not (
raise ValueError(f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://") base_url.startswith("http://") or base_url.startswith("https://")
):
raise ValueError(
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
)
else: else:
# Unified config - check URLs in sources # Unified config - check URLs in sources
for idx, source in enumerate(config_data.get("sources", [])): for idx, source in enumerate(config_data.get("sources", [])):
if source.get("type") == "documentation": if source.get("type") == "documentation":
source_url = source.get("base_url", "") source_url = source.get("base_url", "")
if source_url and not (source_url.startswith("http://") or source_url.startswith("https://")): if source_url and not (
source_url.startswith("http://") or source_url.startswith("https://")
):
raise ValueError( raise ValueError(
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://" f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
) )
@@ -1920,7 +1988,10 @@ Please fix these issues and try again.
# For legacy configs, use name-based detection # For legacy configs, use name-based detection
name_lower = config_name.lower() name_lower = config_name.lower()
category = "other" category = "other"
if any(x in name_lower for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]): if any(
x in name_lower
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
):
category = "web-frameworks" category = "web-frameworks"
elif any(x in name_lower for x in ["godot", "unity", "unreal"]): elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
category = "game-engines" category = "game-engines"
@@ -1936,12 +2007,16 @@ Please fix these issues and try again.
if "max_pages" not in config_data: if "max_pages" not in config_data:
warnings.append("⚠️ No max_pages set - will use default (100)") warnings.append("⚠️ No max_pages set - will use default (100)")
elif config_data.get("max_pages") in (None, -1): elif config_data.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours") warnings.append(
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
)
else: else:
# Unified config warnings # Unified config warnings
for src in config_data.get("sources", []): for src in config_data.get("sources", []):
if src.get("type") == "documentation" and "max_pages" not in src: if src.get("type") == "documentation" and "max_pages" not in src:
warnings.append("⚠️ No max_pages set for documentation source - will use default (100)") warnings.append(
"⚠️ No max_pages set for documentation source - will use default (100)"
)
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1): elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled for documentation source") warnings.append("⚠️ Unlimited scraping enabled for documentation source")
@@ -1996,7 +2071,9 @@ Please fix these issues and try again.
# Create issue # Create issue
issue = repo.create_issue( issue = repo.create_issue(
title=f"[CONFIG] {config_name}", body=issue_body, labels=["config-submission", "needs-review"] title=f"[CONFIG] {config_name}",
body=issue_body,
labels=["config-submission", "needs-review"],
) )
result = f"""✅ Config submitted successfully! result = f"""✅ Config submitted successfully!

View File

@@ -64,7 +64,9 @@ class SourceManager:
""" """
# Validate name # Validate name
if not name or not name.replace("-", "").replace("_", "").isalnum(): if not name or not name.replace("-", "").replace("_", "").isalnum():
raise ValueError(f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores.") raise ValueError(
f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores."
)
# Validate git_url # Validate git_url
if not git_url or not git_url.strip(): if not git_url or not git_url.strip():
@@ -136,7 +138,9 @@ class SourceManager:
# Not found - provide helpful error # Not found - provide helpful error
available = [s["name"] for s in registry["sources"]] available = [s["name"] for s in registry["sources"]]
raise KeyError(f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}") raise KeyError(
f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}"
)
def list_sources(self, enabled_only: bool = False) -> list[dict]: def list_sources(self, enabled_only: bool = False) -> list[dict]:
""" """

View File

@@ -169,7 +169,9 @@ async def validate_config(args: dict) -> list[TextContent]:
try: try:
# Check if file exists # Check if file exists
if not Path(config_path).exists(): if not Path(config_path).exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")] return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
# Try unified config validator first # Try unified config validator first
try: try:
@@ -193,7 +195,9 @@ async def validate_config(args: dict) -> list[TextContent]:
result += f" Max pages: {source.get('max_pages', 'Not set')}\n" result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
elif source["type"] == "github": elif source["type"] == "github":
result += f" Repo: {source.get('repo', 'N/A')}\n" result += f" Repo: {source.get('repo', 'N/A')}\n"
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n" result += (
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
)
elif source["type"] == "pdf": elif source["type"] == "pdf":
result += f" Path: {source.get('path', 'N/A')}\n" result += f" Path: {source.get('path', 'N/A')}\n"

View File

@@ -252,14 +252,18 @@ async def upload_skill_tool(args: dict) -> list[TextContent]:
except ValueError as e: except ValueError as e:
return [ return [
TextContent( TextContent(
type="text", text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai" type="text",
text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai",
) )
] ]
# Check if upload is supported # Check if upload is supported
if target == "markdown": if target == "markdown":
return [ return [
TextContent(type="text", text="❌ Markdown export does not support upload. Use the packaged file manually.") TextContent(
type="text",
text="❌ Markdown export does not support upload. Use the packaged file manually.",
)
] ]
# Run upload_skill.py with target parameter # Run upload_skill.py with target parameter
@@ -323,13 +327,18 @@ async def enhance_skill_tool(args: dict) -> list[TextContent]:
except ValueError as e: except ValueError as e:
return [ return [
TextContent( TextContent(
type="text", text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai" type="text",
text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai",
) )
] ]
# Check if enhancement is supported # Check if enhancement is supported
if not adaptor.supports_enhancement(): if not adaptor.supports_enhancement():
return [TextContent(type="text", text=f"{adaptor.PLATFORM_NAME} does not support AI enhancement")] return [
TextContent(
type="text", text=f"{adaptor.PLATFORM_NAME} does not support AI enhancement"
)
]
output_lines = [] output_lines = []
output_lines.append(f"🚀 Enhancing skill with {adaptor.PLATFORM_NAME}") output_lines.append(f"🚀 Enhancing skill with {adaptor.PLATFORM_NAME}")
@@ -373,12 +382,19 @@ async def enhance_skill_tool(args: dict) -> list[TextContent]:
if not api_key: if not api_key:
return [ return [
TextContent(type="text", text=f"{env_var} not set. Set API key or pass via api_key parameter.") TextContent(
type="text",
text=f"{env_var} not set. Set API key or pass via api_key parameter.",
)
] ]
# Validate API key # Validate API key
if not adaptor.validate_api_key(api_key): if not adaptor.validate_api_key(api_key):
return [TextContent(type="text", text=f"❌ Invalid API key format for {adaptor.PLATFORM_NAME}")] return [
TextContent(
type="text", text=f"❌ Invalid API key format for {adaptor.PLATFORM_NAME}"
)
]
output_lines.append("Calling API for enhancement...") output_lines.append("Calling API for enhancement...")
output_lines.append("") output_lines.append("")
@@ -447,7 +463,8 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
except ValueError as e: except ValueError as e:
return [ return [
TextContent( TextContent(
type="text", text=f"❌ Error: {str(e)}\n\nSupported platforms: claude, gemini, openai, markdown" type="text",
text=f"❌ Error: {str(e)}\n\nSupported platforms: claude, gemini, openai, markdown",
) )
] ]
@@ -498,7 +515,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
if not dry_run: if not dry_run:
# Call fetch_config_tool directly # Call fetch_config_tool directly
fetch_result = await fetch_config_tool({"config_name": config_name, "destination": destination}) fetch_result = await fetch_config_tool(
{"config_name": config_name, "destination": destination}
)
# Parse result to extract config path # Parse result to extract config path
fetch_output = fetch_result[0].text fetch_output = fetch_result[0].text
@@ -512,7 +531,12 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["config_path"] = match.group(1).strip() workflow_state["config_path"] = match.group(1).strip()
output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}") output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
else: else:
return [TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Failed to fetch config")] return [
TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
)
]
workflow_state["phases_completed"].append("fetch_config") workflow_state["phases_completed"].append("fetch_config")
else: else:
@@ -537,7 +561,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
workflow_state["skill_name"] = config.get("name", "unknown") workflow_state["skill_name"] = config.get("name", "unknown")
except Exception as e: except Exception as e:
return [ return [
TextContent(type="text", text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}") TextContent(
type="text",
text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
)
] ]
# Call scrape_docs_tool (does NOT include enhancement) # Call scrape_docs_tool (does NOT include enhancement)
@@ -561,7 +588,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
# Check for success # Check for success
if "" in scrape_output: if "" in scrape_output:
return [ return [
TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above") TextContent(
type="text",
text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
)
] ]
workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}" workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
@@ -641,9 +671,13 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
else: else:
# Fallback: construct package path based on platform # Fallback: construct package path based on platform
if target == "gemini": if target == "gemini":
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}-gemini.tar.gz" workflow_state["zip_path"] = (
f"{destination}/{workflow_state['skill_name']}-gemini.tar.gz"
)
elif target == "openai": elif target == "openai":
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}-openai.zip" workflow_state["zip_path"] = (
f"{destination}/{workflow_state['skill_name']}-openai.zip"
)
else: else:
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}.zip" workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}.zip"
@@ -660,7 +694,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
pkg_ext = "zip" pkg_ext = "zip"
pkg_file = f"{destination}/{workflow_state['skill_name']}.zip" pkg_file = f"{destination}/{workflow_state['skill_name']}.zip"
output_lines.append(f" [DRY RUN] Would package to {pkg_ext} file for {adaptor.PLATFORM_NAME}") output_lines.append(
f" [DRY RUN] Would package to {pkg_ext} file for {adaptor.PLATFORM_NAME}"
)
workflow_state["zip_path"] = pkg_file workflow_state["zip_path"] = pkg_file
output_lines.append("") output_lines.append("")
@@ -725,7 +761,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
output_lines.append(" (No API key needed - markdown is export only)") output_lines.append(" (No API key needed - markdown is export only)")
output_lines.append(f" Package created: {workflow_state['zip_path']}") output_lines.append(f" Package created: {workflow_state['zip_path']}")
else: else:
output_lines.append(f" [DRY RUN] Would upload to {adaptor.PLATFORM_NAME} (if API key set)") output_lines.append(
f" [DRY RUN] Would upload to {adaptor.PLATFORM_NAME} (if API key set)"
)
output_lines.append("") output_lines.append("")
@@ -757,12 +795,16 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
output_lines.append(" Go to https://aistudio.google.com/ to use it") output_lines.append(" Go to https://aistudio.google.com/ to use it")
elif target == "openai": elif target == "openai":
output_lines.append("🎉 Your assistant is now available in OpenAI!") output_lines.append("🎉 Your assistant is now available in OpenAI!")
output_lines.append(" Go to https://platform.openai.com/assistants/ to use it") output_lines.append(
" Go to https://platform.openai.com/assistants/ to use it"
)
elif auto_upload: elif auto_upload:
output_lines.append("📝 Manual upload required (see instructions above)") output_lines.append("📝 Manual upload required (see instructions above)")
else: else:
output_lines.append("📤 To upload:") output_lines.append("📤 To upload:")
output_lines.append(f" skill-seekers upload {workflow_state['zip_path']} --target {target}") output_lines.append(
f" skill-seekers upload {workflow_state['zip_path']} --target {target}"
)
else: else:
output_lines.append("This was a dry run. No actions were taken.") output_lines.append("This was a dry run. No actions were taken.")
output_lines.append("") output_lines.append("")

View File

@@ -140,7 +140,13 @@ async def estimate_pages_tool(args: dict) -> list[TextContent]:
timeout = max(300, max_discovery // 2) # Minimum 5 minutes timeout = max(300, max_discovery // 2) # Minimum 5 minutes
# Run estimate_pages.py # Run estimate_pages.py
cmd = [sys.executable, str(CLI_DIR / "estimate_pages.py"), config_path, "--max-discovery", str(max_discovery)] cmd = [
sys.executable,
str(CLI_DIR / "estimate_pages.py"),
config_path,
"--max-discovery",
str(max_discovery),
]
progress_msg = "🔄 Estimating page count...\n" progress_msg = "🔄 Estimating page count...\n"
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n" progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
@@ -328,7 +334,11 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
cmd.extend(["--from-json", from_json]) cmd.extend(["--from-json", from_json])
else: else:
return [TextContent(type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json")] return [
TextContent(
type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
)
]
# Run pdf_scraper.py with streaming (can take a while) # Run pdf_scraper.py with streaming (can take a while)
timeout = 600 # 10 minutes for PDF extraction timeout = 600 # 10 minutes for PDF extraction
@@ -529,7 +539,11 @@ async def detect_patterns_tool(args: dict) -> list[TextContent]:
directory = args.get("directory") directory = args.get("directory")
if not file_path and not directory: if not file_path and not directory:
return [TextContent(type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter")] return [
TextContent(
type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter"
)
]
output = args.get("output", "") output = args.get("output", "")
depth = args.get("depth", "deep") depth = args.get("depth", "deep")
@@ -604,7 +618,11 @@ async def extract_test_examples_tool(args: dict) -> list[TextContent]:
directory = args.get("directory") directory = args.get("directory")
if not file_path and not directory: if not file_path and not directory:
return [TextContent(type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter")] return [
TextContent(
type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter"
)
]
language = args.get("language", "") language = args.get("language", "")
min_confidence = args.get("min_confidence", 0.5) min_confidence = args.get("min_confidence", 0.5)
@@ -688,7 +706,12 @@ async def build_how_to_guides_tool(args: dict) -> list[TextContent]:
""" """
input_file = args.get("input") input_file = args.get("input")
if not input_file: if not input_file:
return [TextContent(type="text", text="❌ Error: input parameter is required (path to test_examples.json)")] return [
TextContent(
type="text",
text="❌ Error: input parameter is required (path to test_examples.json)",
)
]
output = args.get("output", "output/codebase/tutorials") output = args.get("output", "output/codebase/tutorials")
group_by = args.get("group_by", "ai-tutorial-group") group_by = args.get("group_by", "ai-tutorial-group")

View File

@@ -76,7 +76,12 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
# MODE 1: Named Source (highest priority) # MODE 1: Named Source (highest priority)
if source_name: if source_name:
if not config_name: if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using source parameter")] return [
TextContent(
type="text",
text="❌ Error: config_name is required when using source parameter",
)
]
# Get source from registry # Get source from registry
source_manager = SourceManager() source_manager = SourceManager()
@@ -97,7 +102,11 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
git_repo = GitConfigRepo() git_repo = GitConfigRepo()
try: try:
repo_path = git_repo.clone_or_pull( repo_path = git_repo.clone_or_pull(
source_name=source_name, git_url=git_url, branch=branch, token=token, force_refresh=force_refresh source_name=source_name,
git_url=git_url,
branch=branch,
token=token,
force_refresh=force_refresh,
) )
except Exception as e: except Exception as e:
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")] return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
@@ -139,7 +148,12 @@ Next steps:
# MODE 2: Direct Git URL # MODE 2: Direct Git URL
elif git_url: elif git_url:
if not config_name: if not config_name:
return [TextContent(type="text", text="❌ Error: config_name is required when using git_url parameter")] return [
TextContent(
type="text",
text="❌ Error: config_name is required when using git_url parameter",
)
]
# Clone/pull repository # Clone/pull repository
git_repo = GitConfigRepo() git_repo = GitConfigRepo()
@@ -237,7 +251,9 @@ Next steps:
if tags: if tags:
result += f" Tags: {tags}\n" result += f" Tags: {tags}\n"
result += "\n💡 To download a config, use: fetch_config with config_name='<name>'\n" result += (
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
)
result += f"📚 API Docs: {API_BASE_URL}/docs\n" result += f"📚 API Docs: {API_BASE_URL}/docs\n"
return [TextContent(type="text", text=result)] return [TextContent(type="text", text=result)]
@@ -245,7 +261,10 @@ Next steps:
# Download specific config # Download specific config
if not config_name: if not config_name:
return [ return [
TextContent(type="text", text="❌ Error: Please provide config_name or set list_available=true") TextContent(
type="text",
text="❌ Error: Please provide config_name or set list_available=true",
)
] ]
# Get config details first # Get config details first
@@ -305,11 +324,14 @@ Next steps:
except httpx.HTTPError as e: except httpx.HTTPError as e:
return [ return [
TextContent( TextContent(
type="text", text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later." type="text",
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
) )
] ]
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
return [TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")] return [
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
]
except Exception as e: except Exception as e:
return [TextContent(type="text", text=f"❌ Error: {str(e)}")] return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
@@ -335,7 +357,10 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
from github import Github, GithubException from github import Github, GithubException
except ImportError: except ImportError:
return [ return [
TextContent(type="text", text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub") TextContent(
type="text",
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
)
] ]
# Import config validator # Import config validator
@@ -359,7 +384,9 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if config_path: if config_path:
config_file = Path(config_path) config_file = Path(config_path)
if not config_file.exists(): if not config_file.exists():
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")] return [
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
]
with open(config_file) as f: with open(config_file) as f:
config_data = json.load(f) config_data = json.load(f)
@@ -374,7 +401,11 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")] return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
else: else:
return [TextContent(type="text", text="❌ Error: Must provide either config_path or config_json")] return [
TextContent(
type="text", text="❌ Error: Must provide either config_path or config_json"
)
]
# Use ConfigValidator for comprehensive validation # Use ConfigValidator for comprehensive validation
if ConfigValidator is None: if ConfigValidator is None:
@@ -404,14 +435,20 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
if not is_unified: if not is_unified:
# Legacy config - check base_url # Legacy config - check base_url
base_url = config_data.get("base_url", "") base_url = config_data.get("base_url", "")
if base_url and not (base_url.startswith("http://") or base_url.startswith("https://")): if base_url and not (
raise ValueError(f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://") base_url.startswith("http://") or base_url.startswith("https://")
):
raise ValueError(
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
)
else: else:
# Unified config - check URLs in sources # Unified config - check URLs in sources
for idx, source in enumerate(config_data.get("sources", [])): for idx, source in enumerate(config_data.get("sources", [])):
if source.get("type") == "documentation": if source.get("type") == "documentation":
source_url = source.get("base_url", "") source_url = source.get("base_url", "")
if source_url and not (source_url.startswith("http://") or source_url.startswith("https://")): if source_url and not (
source_url.startswith("http://") or source_url.startswith("https://")
):
raise ValueError( raise ValueError(
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://" f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
) )
@@ -453,7 +490,10 @@ Please fix these issues and try again.
# For legacy configs, use name-based detection # For legacy configs, use name-based detection
name_lower = config_name.lower() name_lower = config_name.lower()
category = "other" category = "other"
if any(x in name_lower for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]): if any(
x in name_lower
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
):
category = "web-frameworks" category = "web-frameworks"
elif any(x in name_lower for x in ["godot", "unity", "unreal"]): elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
category = "game-engines" category = "game-engines"
@@ -469,12 +509,16 @@ Please fix these issues and try again.
if "max_pages" not in config_data: if "max_pages" not in config_data:
warnings.append("⚠️ No max_pages set - will use default (100)") warnings.append("⚠️ No max_pages set - will use default (100)")
elif config_data.get("max_pages") in (None, -1): elif config_data.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours") warnings.append(
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
)
else: else:
# Unified config warnings # Unified config warnings
for src in config_data.get("sources", []): for src in config_data.get("sources", []):
if src.get("type") == "documentation" and "max_pages" not in src: if src.get("type") == "documentation" and "max_pages" not in src:
warnings.append("⚠️ No max_pages set for documentation source - will use default (100)") warnings.append(
"⚠️ No max_pages set for documentation source - will use default (100)"
)
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1): elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
warnings.append("⚠️ Unlimited scraping enabled for documentation source") warnings.append("⚠️ Unlimited scraping enabled for documentation source")
@@ -529,7 +573,9 @@ Please fix these issues and try again.
# Create issue # Create issue
issue = repo.create_issue( issue = repo.create_issue(
title=f"[CONFIG] {config_name}", body=issue_body, labels=["config-submission", "needs-review"] title=f"[CONFIG] {config_name}",
body=issue_body,
labels=["config-submission", "needs-review"],
) )
result = f"""✅ Config submitted successfully! result = f"""✅ Config submitted successfully!

View File

@@ -183,7 +183,9 @@ async def generate_router(args: dict) -> list[TextContent]:
config_files = glob.glob(config_pattern) config_files = glob.glob(config_pattern)
if not config_files: if not config_files:
return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")] return [
TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
]
# Run generate_router.py # Run generate_router.py
cmd = [ cmd = [

View File

@@ -282,7 +282,12 @@ Pass data to components:
def test_e2e_package_format_validation(self): def test_e2e_package_format_validation(self):
"""Test that each platform creates correct package format""" """Test that each platform creates correct package format"""
test_cases = [("claude", ".zip"), ("gemini", ".tar.gz"), ("openai", ".zip"), ("markdown", ".zip")] test_cases = [
("claude", ".zip"),
("gemini", ".tar.gz"),
("openai", ".zip"),
("markdown", ".zip"),
]
for platform, expected_ext in test_cases: for platform, expected_ext in test_cases:
adaptor = get_adaptor(platform) adaptor = get_adaptor(platform)
@@ -290,9 +295,13 @@ Pass data to components:
# Verify extension # Verify extension
if expected_ext == ".tar.gz": if expected_ext == ".tar.gz":
self.assertTrue(str(package_path).endswith(".tar.gz"), f"{platform} should create .tar.gz file") self.assertTrue(
str(package_path).endswith(".tar.gz"), f"{platform} should create .tar.gz file"
)
else: else:
self.assertTrue(str(package_path).endswith(".zip"), f"{platform} should create .zip file") self.assertTrue(
str(package_path).endswith(".zip"), f"{platform} should create .zip file"
)
def test_e2e_package_filename_convention(self): def test_e2e_package_filename_convention(self):
"""Test that package filenames follow convention""" """Test that package filenames follow convention"""
@@ -308,7 +317,9 @@ Pass data to components:
package_path = adaptor.package(self.skill_dir, self.output_dir) package_path = adaptor.package(self.skill_dir, self.output_dir)
# Verify filename # Verify filename
self.assertEqual(package_path.name, expected_name, f"{platform} package filename incorrect") self.assertEqual(
package_path.name, expected_name, f"{platform} package filename incorrect"
)
def test_e2e_all_platforms_preserve_references(self): def test_e2e_all_platforms_preserve_references(self):
"""Test that all platforms preserve reference files""" """Test that all platforms preserve reference files"""
@@ -324,7 +335,8 @@ Pass data to components:
names = tar.getnames() names = tar.getnames()
for ref_file in ref_files: for ref_file in ref_files:
self.assertTrue( self.assertTrue(
any(ref_file in name for name in names), f"{platform}: {ref_file} not found in package" any(ref_file in name for name in names),
f"{platform}: {ref_file} not found in package",
) )
else: else:
with zipfile.ZipFile(package_path, "r") as zf: with zipfile.ZipFile(package_path, "r") as zf:
@@ -338,7 +350,8 @@ Pass data to components:
) )
else: else:
self.assertTrue( self.assertTrue(
any(ref_file in name for name in names), f"{platform}: {ref_file} not found in package" any(ref_file in name for name in names),
f"{platform}: {ref_file} not found in package",
) )
def test_e2e_metadata_consistency(self): def test_e2e_metadata_consistency(self):
@@ -357,7 +370,9 @@ Pass data to components:
metadata = json.loads(metadata_file.read().decode("utf-8")) metadata = json.loads(metadata_file.read().decode("utf-8"))
else: else:
with zipfile.ZipFile(package_path, "r") as zf: with zipfile.ZipFile(package_path, "r") as zf:
metadata_filename = f"{platform}_metadata.json" if platform == "openai" else "metadata.json" metadata_filename = (
f"{platform}_metadata.json" if platform == "openai" else "metadata.json"
)
metadata_content = zf.read(metadata_filename).decode("utf-8") metadata_content = zf.read(metadata_filename).decode("utf-8")
metadata = json.loads(metadata_content) metadata = json.loads(metadata_content)
@@ -467,7 +482,9 @@ class TestAdaptorsWorkflowIntegration(unittest.TestCase):
# Should respect custom path # Should respect custom path
self.assertTrue(package_path.exists()) self.assertTrue(package_path.exists())
self.assertTrue("my-package" in package_path.name or package_path.parent.name == "custom") self.assertTrue(
"my-package" in package_path.name or package_path.parent.name == "custom"
)
def test_workflow_api_key_validation(self): def test_workflow_api_key_validation(self):
"""Test API key validation for each platform""" """Test API key validation for each platform"""
@@ -485,7 +502,9 @@ class TestAdaptorsWorkflowIntegration(unittest.TestCase):
for platform, api_key, expected in test_cases: for platform, api_key, expected in test_cases:
adaptor = get_adaptor(platform) adaptor = get_adaptor(platform)
result = adaptor.validate_api_key(api_key) result = adaptor.validate_api_key(api_key)
self.assertEqual(result, expected, f"{platform}: validate_api_key('{api_key}') should be {expected}") self.assertEqual(
result, expected, f"{platform}: validate_api_key('{api_key}') should be {expected}"
)
class TestAdaptorsErrorHandling(unittest.TestCase): class TestAdaptorsErrorHandling(unittest.TestCase):

View File

@@ -58,7 +58,9 @@ class TestClaudeAdaptor(unittest.TestCase):
(skill_dir / "references").mkdir() (skill_dir / "references").mkdir()
(skill_dir / "references" / "test.md").write_text("# Test content") (skill_dir / "references" / "test.md").write_text("# Test content")
metadata = SkillMetadata(name="test-skill", description="Test skill description", version="1.0.0") metadata = SkillMetadata(
name="test-skill", description="Test skill description", version="1.0.0"
)
formatted = self.adaptor.format_skill_md(skill_dir, metadata) formatted = self.adaptor.format_skill_md(skill_dir, metadata)
@@ -221,7 +223,9 @@ This is existing skill content that should be preserved.
self.assertTrue(package_path.exists()) self.assertTrue(package_path.exists())
# Should respect custom naming if provided # Should respect custom naming if provided
self.assertTrue("my-package" in package_path.name or package_path.parent.name == "custom") self.assertTrue(
"my-package" in package_path.name or package_path.parent.name == "custom"
)
def test_package_to_directory(self): def test_package_to_directory(self):
"""Test packaging to directory (should auto-name)""" """Test packaging to directory (should auto-name)"""

View File

@@ -95,7 +95,9 @@ class TestAPIReferenceBuilder(unittest.TestCase):
"functions": [ "functions": [
{ {
"name": "calculate_sum", "name": "calculate_sum",
"parameters": [{"name": "numbers", "type_hint": "list", "default": None}], "parameters": [
{"name": "numbers", "type_hint": "list", "default": None}
],
"return_type": "int", "return_type": "int",
"docstring": "Calculate sum of numbers.", "docstring": "Calculate sum of numbers.",
"is_async": False, "is_async": False,
@@ -166,7 +168,14 @@ class TestAPIReferenceBuilder(unittest.TestCase):
{ {
"file": "module.py", "file": "module.py",
"language": "Python", "language": "Python",
"classes": [{"name": "TestClass", "docstring": "Test class.", "base_classes": [], "methods": []}], "classes": [
{
"name": "TestClass",
"docstring": "Test class.",
"base_classes": [],
"methods": [],
}
],
"functions": [ "functions": [
{ {
"name": "test_func", "name": "test_func",

View File

@@ -192,9 +192,15 @@ How to use async tools.
with ( with (
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo), patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
patch.object( patch.object(
GitHubThreeStreamFetcher, "fetch_github_metadata", return_value=mock_github_api_data["metadata"] GitHubThreeStreamFetcher,
"fetch_github_metadata",
return_value=mock_github_api_data["metadata"],
),
patch.object(
GitHubThreeStreamFetcher,
"fetch_issues",
return_value=mock_github_api_data["issues"],
), ),
patch.object(GitHubThreeStreamFetcher, "fetch_issues", return_value=mock_github_api_data["issues"]),
): ):
fetcher = GitHubThreeStreamFetcher("https://github.com/jlowin/fastmcp") fetcher = GitHubThreeStreamFetcher("https://github.com/jlowin/fastmcp")
three_streams = fetcher.fetch() three_streams = fetcher.fetch()
@@ -227,10 +233,18 @@ How to use async tools.
with ( with (
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo), patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
patch.object( patch.object(
GitHubThreeStreamFetcher, "fetch_github_metadata", return_value=mock_github_api_data["metadata"] GitHubThreeStreamFetcher,
"fetch_github_metadata",
return_value=mock_github_api_data["metadata"],
), ),
patch.object(GitHubThreeStreamFetcher, "fetch_issues", return_value=mock_github_api_data["issues"]), patch.object(
patch("skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis") as mock_c3x, GitHubThreeStreamFetcher,
"fetch_issues",
return_value=mock_github_api_data["issues"],
),
patch(
"skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis"
) as mock_c3x,
): ):
# Mock C3.x analysis to return sample data # Mock C3.x analysis to return sample data
mock_c3x.return_value = { mock_c3x.return_value = {
@@ -247,7 +261,9 @@ How to use async tools.
"c3_2_examples_count": 2, "c3_2_examples_count": 2,
"c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}], "c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}],
"c3_4_configs": [], "c3_4_configs": [],
"c3_7_architecture": [{"pattern": "Service Layer", "description": "OAuth provider abstraction"}], "c3_7_architecture": [
{"pattern": "Service Layer", "description": "OAuth provider abstraction"}
],
} }
analyzer = UnifiedCodebaseAnalyzer() analyzer = UnifiedCodebaseAnalyzer()
@@ -316,7 +332,13 @@ How to use async tools.
"description": "Python framework for MCP servers", "description": "Python framework for MCP servers",
}, },
common_problems=[ common_problems=[
{"number": 42, "title": "OAuth setup fails", "labels": ["oauth"], "comments": 15, "state": "open"}, {
"number": 42,
"title": "OAuth setup fails",
"labels": ["oauth"],
"comments": 15,
"state": "open",
},
{ {
"number": 38, "number": 38,
"title": "Async tools not working", "title": "Async tools not working",
@@ -344,7 +366,9 @@ How to use async tools.
# Generate router # Generate router
generator = RouterGenerator( generator = RouterGenerator(
config_paths=[str(config1), str(config2)], router_name="fastmcp", github_streams=mock_streams config_paths=[str(config1), str(config2)],
router_name="fastmcp",
github_streams=mock_streams,
) )
skill_md = generator.generate_skill_md() skill_md = generator.generate_skill_md()
@@ -536,15 +560,21 @@ class TestScenario2MultiSource:
source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]} source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]}
# Mock source 2 (GitHub C3.x) # Mock source 2 (GitHub C3.x)
source2_data = {"api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]} source2_data = {
"api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]
}
# Mock GitHub streams # Mock GitHub streams
github_streams = ThreeStreamData( github_streams = ThreeStreamData(
code_stream=CodeStream(directory=Path("/tmp"), files=[]), code_stream=CodeStream(directory=Path("/tmp"), files=[]),
docs_stream=DocsStream(readme="Use client_id and client_secret", contributing=None, docs_files=[]), docs_stream=DocsStream(
readme="Use client_id and client_secret", contributing=None, docs_files=[]
),
insights_stream=InsightsStream( insights_stream=InsightsStream(
metadata={"stars": 1000}, metadata={"stars": 1000},
common_problems=[{"number": 42, "title": "OAuth parameter confusion", "labels": ["oauth"]}], common_problems=[
{"number": 42, "title": "OAuth parameter confusion", "labels": ["oauth"]}
],
known_solutions=[], known_solutions=[],
top_labels=[], top_labels=[],
), ),
@@ -633,7 +663,9 @@ def test_connection():
"""Test basic analysis of local codebase.""" """Test basic analysis of local codebase."""
analyzer = UnifiedCodebaseAnalyzer() analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source=str(local_codebase), depth="basic", fetch_github_metadata=False) result = analyzer.analyze(
source=str(local_codebase), depth="basic", fetch_github_metadata=False
)
# Verify result # Verify result
assert isinstance(result, AnalysisResult) assert isinstance(result, AnalysisResult)
@@ -653,7 +685,9 @@ def test_connection():
"""Test C3.x analysis of local codebase.""" """Test C3.x analysis of local codebase."""
analyzer = UnifiedCodebaseAnalyzer() analyzer = UnifiedCodebaseAnalyzer()
with patch("skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis") as mock_c3x: with patch(
"skill_seekers.cli.unified_codebase_analyzer.UnifiedCodebaseAnalyzer.c3x_analysis"
) as mock_c3x:
# Mock C3.x to return sample data # Mock C3.x to return sample data
mock_c3x.return_value = { mock_c3x.return_value = {
"files": ["database.py", "api.py"], "files": ["database.py", "api.py"],
@@ -666,7 +700,9 @@ def test_connection():
"c3_7_architecture": [], "c3_7_architecture": [],
} }
result = analyzer.analyze(source=str(local_codebase), depth="c3x", fetch_github_metadata=False) result = analyzer.analyze(
source=str(local_codebase), depth="c3x", fetch_github_metadata=False
)
# Verify result # Verify result
assert result.source_type == "local" assert result.source_type == "local"
@@ -814,7 +850,12 @@ Based on analysis of GitHub issues:
github_overhead += 1 github_overhead += 1
continue continue
if in_repo_info: if in_repo_info:
if line.startswith("**") or "github.com" in line or "" in line or "FastMCP is" in line: if (
line.startswith("**")
or "github.com" in line
or "" in line
or "FastMCP is" in line
):
github_overhead += 1 github_overhead += 1
if line.startswith("##"): if line.startswith("##"):
in_repo_info = False in_repo_info = False
@@ -894,7 +935,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check minimum 3 code examples # Check minimum 3 code examples
code_blocks = sub_skill_md.count("```") code_blocks = sub_skill_md.count("```")
assert code_blocks >= 6, f"Need at least 3 code examples (6 markers), found {code_blocks // 2}" assert code_blocks >= 6, (
f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
)
# Check language tags # Check language tags
assert "```python" in sub_skill_md, "Code blocks must have language tags" assert "```python" in sub_skill_md, "Code blocks must have language tags"
@@ -909,7 +952,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check solution indicators for closed issues # Check solution indicators for closed issues
if "closed" in sub_skill_md.lower(): if "closed" in sub_skill_md.lower():
assert "" in sub_skill_md or "Solution" in sub_skill_md, "Closed issues should indicate solution found" assert "" in sub_skill_md or "Solution" in sub_skill_md, (
"Closed issues should indicate solution found"
)
class TestTokenEfficiencyCalculation: class TestTokenEfficiencyCalculation:
@@ -946,7 +991,9 @@ class TestTokenEfficiencyCalculation:
# With selective loading and caching, achieve 35-40% # With selective loading and caching, achieve 35-40%
# Even conservative estimate shows 29.5%, actual usage patterns show 35-40% # Even conservative estimate shows 29.5%, actual usage patterns show 35-40%
assert reduction_percent >= 29, f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)" assert reduction_percent >= 29, (
f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -92,7 +92,11 @@ class TestAsyncScrapeMethods(unittest.TestCase):
def test_scrape_page_async_exists(self): def test_scrape_page_async_exists(self):
"""Test scrape_page_async method exists""" """Test scrape_page_async method exists"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}} config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
try: try:
@@ -105,7 +109,11 @@ class TestAsyncScrapeMethods(unittest.TestCase):
def test_scrape_all_async_exists(self): def test_scrape_all_async_exists(self):
"""Test scrape_all_async method exists""" """Test scrape_all_async method exists"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}} config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
try: try:
@@ -144,7 +152,9 @@ class TestAsyncRouting(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
# Mock scrape_all_async to verify it gets called # Mock scrape_all_async to verify it gets called
with patch.object(converter, "scrape_all_async", new_callable=AsyncMock) as mock_async: with patch.object(
converter, "scrape_all_async", new_callable=AsyncMock
) as mock_async:
converter.scrape_all() converter.scrape_all()
# Verify async version was called # Verify async version was called
mock_async.assert_called_once() mock_async.assert_called_once()
@@ -167,7 +177,9 @@ class TestAsyncRouting(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
# Mock scrape_all_async to verify it does NOT get called # Mock scrape_all_async to verify it does NOT get called
with patch.object(converter, "scrape_all_async", new_callable=AsyncMock) as mock_async: with patch.object(
converter, "scrape_all_async", new_callable=AsyncMock
) as mock_async:
with patch.object(converter, "_try_llms_txt", return_value=False): with patch.object(converter, "_try_llms_txt", return_value=False):
converter.scrape_all() converter.scrape_all()
# Verify async version was NOT called # Verify async version was NOT called
@@ -249,7 +261,9 @@ class TestAsyncErrorHandling(unittest.TestCase):
# Mock client.get to raise exception # Mock client.get to raise exception
with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")): with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")):
# Should not raise exception, just log error # Should not raise exception, just log error
await converter.scrape_page_async("https://example.com/test", semaphore, client) await converter.scrape_page_async(
"https://example.com/test", semaphore, client
)
# Run async test # Run async test
asyncio.run(run_test()) asyncio.run(run_test())

View File

@@ -38,18 +38,16 @@ def project_root():
@pytest.fixture @pytest.fixture
def run_bootstrap(project_root): def run_bootstrap(project_root):
"""Execute bootstrap script and return result""" """Execute bootstrap script and return result"""
def _run(timeout=600): def _run(timeout=600):
script = project_root / "scripts" / "bootstrap_skill.sh" script = project_root / "scripts" / "bootstrap_skill.sh"
result = subprocess.run( result = subprocess.run(
["bash", str(script)], ["bash", str(script)], cwd=project_root, capture_output=True, text=True, timeout=timeout
cwd=project_root,
capture_output=True,
text=True,
timeout=timeout
) )
return result return result
return _run return _run
@@ -95,7 +93,7 @@ class TestBootstrapSkillE2E:
assert content.startswith("---"), "Missing frontmatter start" assert content.startswith("---"), "Missing frontmatter start"
# Find closing delimiter # Find closing delimiter
lines = content.split('\n') lines = content.split("\n")
closing_found = False closing_found = False
for i, line in enumerate(lines[1:], 1): for i, line in enumerate(lines[1:], 1):
if line.strip() == "---": if line.strip() == "---":
@@ -129,11 +127,7 @@ class TestBootstrapSkillE2E:
# Create venv # Create venv
venv_path = tmp_path / "test_venv" venv_path = tmp_path / "test_venv"
subprocess.run( subprocess.run([sys.executable, "-m", "venv", str(venv_path)], check=True, timeout=60)
[sys.executable, "-m", "venv", str(venv_path)],
check=True,
timeout=60
)
# Install skill in venv # Install skill in venv
pip_path = venv_path / "bin" / "pip" pip_path = venv_path / "bin" / "pip"
@@ -142,7 +136,7 @@ class TestBootstrapSkillE2E:
cwd=output_skill_dir.parent.parent, cwd=output_skill_dir.parent.parent,
capture_output=True, capture_output=True,
text=True, text=True,
timeout=120 timeout=120,
) )
# Should install successfully # Should install successfully
@@ -156,13 +150,13 @@ class TestBootstrapSkillE2E:
# Try to package with claude adaptor (simplest) # Try to package with claude adaptor (simplest)
from skill_seekers.cli.adaptors import get_adaptor from skill_seekers.cli.adaptors import get_adaptor
adaptor = get_adaptor('claude') adaptor = get_adaptor("claude")
# Should be able to package without errors # Should be able to package without errors
try: try:
package_path = adaptor.package( package_path = adaptor.package(
skill_dir=output_skill_dir, # Path object, not str skill_dir=output_skill_dir, # Path object, not str
output_path=tmp_path # Path object, not str output_path=tmp_path, # Path object, not str
) )
assert Path(package_path).exists(), "Package not created" assert Path(package_path).exists(), "Package not created"

View File

@@ -111,7 +111,10 @@ class TestC3Integration:
} }
], ],
"ai_enhancements": { "ai_enhancements": {
"overall_insights": {"security_issues_found": 1, "recommended_actions": ["Move secrets to .env"]} "overall_insights": {
"security_issues_found": 1,
"recommended_actions": ["Move secrets to .env"],
}
}, },
}, },
"architecture": { "architecture": {
@@ -120,7 +123,11 @@ class TestC3Integration:
"pattern_name": "MVC", "pattern_name": "MVC",
"confidence": 0.89, "confidence": 0.89,
"framework": "Flask", "framework": "Flask",
"evidence": ["models/ directory", "views/ directory", "controllers/ directory"], "evidence": [
"models/ directory",
"views/ directory",
"controllers/ directory",
],
} }
], ],
"frameworks_detected": ["Flask", "SQLAlchemy"], "frameworks_detected": ["Flask", "SQLAlchemy"],
@@ -173,7 +180,9 @@ class TestC3Integration:
"""Test ARCHITECTURE.md is generated with all 8 sections.""" """Test ARCHITECTURE.md is generated with all 8 sections."""
# Create skill builder with C3.x data (multi-source list format) # Create skill builder with C3.x data (multi-source list format)
github_data = {"readme": "Test README", "c3_analysis": mock_c3_data} github_data = {"readme": "Test README", "c3_analysis": mock_c3_data}
scraped_data = {"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]} scraped_data = {
"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]
}
builder = UnifiedSkillBuilder(mock_config, scraped_data) builder = UnifiedSkillBuilder(mock_config, scraped_data)
builder.skill_dir = temp_dir builder.skill_dir = temp_dir
@@ -212,7 +221,9 @@ class TestC3Integration:
"""Test correct C3.x reference directory structure is created.""" """Test correct C3.x reference directory structure is created."""
# Create skill builder with C3.x data (multi-source list format) # Create skill builder with C3.x data (multi-source list format)
github_data = {"readme": "Test README", "c3_analysis": mock_c3_data} github_data = {"readme": "Test README", "c3_analysis": mock_c3_data}
scraped_data = {"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]} scraped_data = {
"github": [{"repo": "test/repo", "repo_id": "test_repo", "idx": 0, "data": github_data}]
}
builder = UnifiedSkillBuilder(mock_config, scraped_data) builder = UnifiedSkillBuilder(mock_config, scraped_data)
builder.skill_dir = temp_dir builder.skill_dir = temp_dir
@@ -261,7 +272,11 @@ class TestC3Integration:
# Mock GitHubScraper (correct module path for import) # Mock GitHubScraper (correct module path for import)
with patch("skill_seekers.cli.github_scraper.GitHubScraper") as mock_github: with patch("skill_seekers.cli.github_scraper.GitHubScraper") as mock_github:
mock_github.return_value.scrape.return_value = {"readme": "Test README", "issues": [], "releases": []} mock_github.return_value.scrape.return_value = {
"readme": "Test README",
"issues": [],
"releases": [],
}
scraper = UnifiedScraper(config_path) scraper = UnifiedScraper(config_path)
@@ -278,7 +293,14 @@ class TestC3Integration:
config = { config = {
"name": "test", "name": "test",
"description": "Test", "description": "Test",
"sources": [{"type": "github", "repo": "test/repo", "enable_codebase_analysis": True, "ai_mode": "auto"}], "sources": [
{
"type": "github",
"repo": "test/repo",
"enable_codebase_analysis": True,
"ai_mode": "auto",
}
],
} }
# Save config # Save config

View File

@@ -19,7 +19,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_doc_scraper_uses_modern_commands(self): def test_doc_scraper_uses_modern_commands(self):
"""Test doc_scraper.py uses skill-seekers commands""" """Test doc_scraper.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "doc_scraper.py" script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "doc_scraper.py"
)
with open(script_path) as f: with open(script_path) as f:
content = f.read() content = f.read()
@@ -32,7 +34,13 @@ class TestModernCLICommands(unittest.TestCase):
def test_enhance_skill_local_uses_modern_commands(self): def test_enhance_skill_local_uses_modern_commands(self):
"""Test enhance_skill_local.py uses skill-seekers commands""" """Test enhance_skill_local.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "enhance_skill_local.py" script_path = (
Path(__file__).parent.parent
/ "src"
/ "skill_seekers"
/ "cli"
/ "enhance_skill_local.py"
)
with open(script_path) as f: with open(script_path) as f:
content = f.read() content = f.read()
@@ -45,7 +53,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_estimate_pages_uses_modern_commands(self): def test_estimate_pages_uses_modern_commands(self):
"""Test estimate_pages.py uses skill-seekers commands""" """Test estimate_pages.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "estimate_pages.py" script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "estimate_pages.py"
)
with open(script_path) as f: with open(script_path) as f:
content = f.read() content = f.read()
@@ -58,7 +68,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_package_skill_uses_modern_commands(self): def test_package_skill_uses_modern_commands(self):
"""Test package_skill.py uses skill-seekers commands""" """Test package_skill.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "package_skill.py" script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "package_skill.py"
)
with open(script_path) as f: with open(script_path) as f:
content = f.read() content = f.read()
@@ -71,7 +83,9 @@ class TestModernCLICommands(unittest.TestCase):
def test_github_scraper_uses_modern_commands(self): def test_github_scraper_uses_modern_commands(self):
"""Test github_scraper.py uses skill-seekers commands""" """Test github_scraper.py uses skill-seekers commands"""
script_path = Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "github_scraper.py" script_path = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "cli" / "github_scraper.py"
)
with open(script_path) as f: with open(script_path) as f:
content = f.read() content = f.read()
@@ -89,10 +103,16 @@ class TestUnifiedCLIEntryPoints(unittest.TestCase):
def test_main_cli_help_output(self): def test_main_cli_help_output(self):
"""Test skill-seekers --help works""" """Test skill-seekers --help works"""
try: try:
result = subprocess.run(["skill-seekers", "--help"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers", "--help"], capture_output=True, text=True, timeout=5
)
# Should return successfully # Should return successfully
self.assertIn(result.returncode, [0, 2], f"skill-seekers --help failed with code {result.returncode}") self.assertIn(
result.returncode,
[0, 2],
f"skill-seekers --help failed with code {result.returncode}",
)
# Should show subcommands # Should show subcommands
output = result.stdout + result.stderr output = result.stdout + result.stderr
@@ -107,14 +127,18 @@ class TestUnifiedCLIEntryPoints(unittest.TestCase):
def test_main_cli_version_output(self): def test_main_cli_version_output(self):
"""Test skill-seekers --version works""" """Test skill-seekers --version works"""
try: try:
result = subprocess.run(["skill-seekers", "--version"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers", "--version"], capture_output=True, text=True, timeout=5
)
# Should return successfully # Should return successfully
self.assertEqual(result.returncode, 0, f"skill-seekers --version failed: {result.stderr}") self.assertEqual(
result.returncode, 0, f"skill-seekers --version failed: {result.stderr}"
)
# Should show version # Should show version
output = result.stdout + result.stderr output = result.stdout + result.stderr
self.assertIn('2.7.0', output) self.assertIn("2.7.0", output)
except FileNotFoundError: except FileNotFoundError:
# If skill-seekers is not installed, skip this test # If skill-seekers is not installed, skip this test
@@ -140,7 +164,9 @@ class TestNoHardcodedPaths(unittest.TestCase):
for hardcoded_path in hardcoded_paths: for hardcoded_path in hardcoded_paths:
self.assertNotIn( self.assertNotIn(
hardcoded_path, content, f"{script_path.name} contains hardcoded path: {hardcoded_path}" hardcoded_path,
content,
f"{script_path.name} contains hardcoded path: {hardcoded_path}",
) )

View File

@@ -173,7 +173,10 @@ API_KEY=secret123
PORT=8000 PORT=8000
""" """
config_file = ConfigFile( config_file = ConfigFile(
file_path=str(Path(self.temp_dir) / ".env"), relative_path=".env", config_type="env", purpose="unknown" file_path=str(Path(self.temp_dir) / ".env"),
relative_path=".env",
config_type="env",
purpose="unknown",
) )
file_path = Path(self.temp_dir) / ".env" file_path = Path(self.temp_dir) / ".env"
@@ -313,7 +316,8 @@ endpoint = "https://api.example.com"
# Check if parsing failed due to missing toml/tomli # Check if parsing failed due to missing toml/tomli
if config_file.parse_errors and ( if config_file.parse_errors and (
"toml" in str(config_file.parse_errors).lower() and "not installed" in str(config_file.parse_errors) "toml" in str(config_file.parse_errors).lower()
and "not installed" in str(config_file.parse_errors)
): ):
self.skipTest("toml/tomli not installed") self.skipTest("toml/tomli not installed")
@@ -337,7 +341,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)
@@ -353,7 +361,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)
@@ -369,7 +381,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)
@@ -385,7 +401,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)
@@ -402,7 +422,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)
@@ -418,7 +442,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)
@@ -434,7 +462,11 @@ class TestConfigPatternDetector(unittest.TestCase):
] ]
config_file = ConfigFile( config_file = ConfigFile(
file_path="test.json", relative_path="test.json", config_type="json", purpose="unknown", settings=settings file_path="test.json",
relative_path="test.json",
config_type="json",
purpose="unknown",
settings=settings,
) )
patterns = self.detector.detect_patterns(config_file) patterns = self.detector.detect_patterns(config_file)

View File

@@ -30,7 +30,11 @@ class TestConfigValidation(unittest.TestCase):
"name": "godot", "name": "godot",
"base_url": "https://docs.godotengine.org/en/stable/", "base_url": "https://docs.godotengine.org/en/stable/",
"description": "Godot Engine documentation", "description": "Godot Engine documentation",
"selectors": {"main_content": 'div[role="main"]', "title": "title", "code_blocks": "pre code"}, "selectors": {
"main_content": 'div[role="main"]',
"title": "title",
"code_blocks": "pre code",
},
"url_patterns": {"include": ["/guide/", "/api/"], "exclude": ["/blog/"]}, "url_patterns": {"include": ["/guide/", "/api/"], "exclude": ["/blog/"]},
"categories": {"getting_started": ["intro", "tutorial"], "api": ["api", "reference"]}, "categories": {"getting_started": ["intro", "tutorial"], "api": ["api", "reference"]},
"rate_limit": 0.5, "rate_limit": 0.5,
@@ -84,7 +88,9 @@ class TestConfigValidation(unittest.TestCase):
"""Test invalid selectors (not a dictionary)""" """Test invalid selectors (not a dictionary)"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": "invalid"} config = {"name": "test", "base_url": "https://example.com/", "selectors": "invalid"}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("selectors" in error.lower() and "dictionary" in error.lower() for error in errors)) self.assertTrue(
any("selectors" in error.lower() and "dictionary" in error.lower() for error in errors)
)
def test_missing_recommended_selectors(self): def test_missing_recommended_selectors(self):
"""Test warning for missing recommended selectors""" """Test warning for missing recommended selectors"""
@@ -104,25 +110,44 @@ class TestConfigValidation(unittest.TestCase):
"""Test invalid url_patterns (not a dictionary)""" """Test invalid url_patterns (not a dictionary)"""
config = {"name": "test", "base_url": "https://example.com/", "url_patterns": []} config = {"name": "test", "base_url": "https://example.com/", "url_patterns": []}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("url_patterns" in error.lower() and "dictionary" in error.lower() for error in errors)) self.assertTrue(
any(
"url_patterns" in error.lower() and "dictionary" in error.lower()
for error in errors
)
)
def test_invalid_url_patterns_include_not_list(self): def test_invalid_url_patterns_include_not_list(self):
"""Test invalid url_patterns.include (not a list)""" """Test invalid url_patterns.include (not a list)"""
config = {"name": "test", "base_url": "https://example.com/", "url_patterns": {"include": "not-a-list"}} config = {
"name": "test",
"base_url": "https://example.com/",
"url_patterns": {"include": "not-a-list"},
}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("include" in error.lower() and "list" in error.lower() for error in errors)) self.assertTrue(
any("include" in error.lower() and "list" in error.lower() for error in errors)
)
def test_invalid_categories_not_dict(self): def test_invalid_categories_not_dict(self):
"""Test invalid categories (not a dictionary)""" """Test invalid categories (not a dictionary)"""
config = {"name": "test", "base_url": "https://example.com/", "categories": []} config = {"name": "test", "base_url": "https://example.com/", "categories": []}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("categories" in error.lower() and "dictionary" in error.lower() for error in errors)) self.assertTrue(
any("categories" in error.lower() and "dictionary" in error.lower() for error in errors)
)
def test_invalid_category_keywords_not_list(self): def test_invalid_category_keywords_not_list(self):
"""Test invalid category keywords (not a list)""" """Test invalid category keywords (not a list)"""
config = {"name": "test", "base_url": "https://example.com/", "categories": {"getting_started": "not-a-list"}} config = {
"name": "test",
"base_url": "https://example.com/",
"categories": {"getting_started": "not-a-list"},
}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("getting_started" in error.lower() and "list" in error.lower() for error in errors)) self.assertTrue(
any("getting_started" in error.lower() and "list" in error.lower() for error in errors)
)
def test_invalid_rate_limit_negative(self): def test_invalid_rate_limit_negative(self):
"""Test invalid rate_limit (negative)""" """Test invalid rate_limit (negative)"""
@@ -178,13 +203,23 @@ class TestConfigValidation(unittest.TestCase):
def test_invalid_start_urls_not_list(self): def test_invalid_start_urls_not_list(self):
"""Test invalid start_urls (not a list)""" """Test invalid start_urls (not a list)"""
config = {"name": "test", "base_url": "https://example.com/", "start_urls": "https://example.com/page1"} config = {
"name": "test",
"base_url": "https://example.com/",
"start_urls": "https://example.com/page1",
}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("start_urls" in error.lower() and "list" in error.lower() for error in errors)) self.assertTrue(
any("start_urls" in error.lower() and "list" in error.lower() for error in errors)
)
def test_invalid_start_urls_bad_protocol(self): def test_invalid_start_urls_bad_protocol(self):
"""Test invalid start_urls (bad protocol)""" """Test invalid start_urls (bad protocol)"""
config = {"name": "test", "base_url": "https://example.com/", "start_urls": ["ftp://example.com/page1"]} config = {
"name": "test",
"base_url": "https://example.com/",
"start_urls": ["ftp://example.com/page1"],
}
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertTrue(any("start_url" in error.lower() for error in errors)) self.assertTrue(any("start_url" in error.lower() for error in errors))
@@ -193,7 +228,11 @@ class TestConfigValidation(unittest.TestCase):
config = { config = {
"name": "test", "name": "test",
"base_url": "https://example.com/", "base_url": "https://example.com/",
"start_urls": ["https://example.com/page1", "http://example.com/page2", "https://example.com/api/docs"], "start_urls": [
"https://example.com/page1",
"http://example.com/page2",
"https://example.com/api/docs",
],
} }
errors, _ = validate_config(config) errors, _ = validate_config(config)
url_errors = [e for e in errors if "start_url" in e.lower()] url_errors = [e for e in errors if "start_url" in e.lower()]

View File

@@ -153,7 +153,9 @@ class TestConstantsExports(unittest.TestCase):
self.assertTrue(hasattr(constants, "__all__")) self.assertTrue(hasattr(constants, "__all__"))
for name in constants.__all__: for name in constants.__all__:
self.assertTrue(hasattr(constants, name), f"Constant '{name}' in __all__ but not defined") self.assertTrue(
hasattr(constants, name), f"Constant '{name}' in __all__ but not defined"
)
def test_all_exports_count(self): def test_all_exports_count(self):
"""Test that __all__ has expected number of exports.""" """Test that __all__ has expected number of exports."""

View File

@@ -54,7 +54,9 @@ function greet(name) {
""") """)
# Create mock three-stream data # Create mock three-stream data
code_stream = CodeStream(directory=tmp_path, files=[tmp_path / "main.py", tmp_path / "utils.js"]) code_stream = CodeStream(
directory=tmp_path, files=[tmp_path / "main.py", tmp_path / "utils.js"]
)
docs_stream = DocsStream( docs_stream = DocsStream(
readme="""# Test Project readme="""# Test Project
@@ -74,10 +76,17 @@ hello()
``` ```
""", """,
contributing="# Contributing\n\nPull requests welcome!", contributing="# Contributing\n\nPull requests welcome!",
docs_files=[{"path": "docs/guide.md", "content": "# User Guide\n\nHow to use this project."}], docs_files=[
{"path": "docs/guide.md", "content": "# User Guide\n\nHow to use this project."}
],
) )
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 1234, "forks": 56, "language": "Python", "description": "A test project"}, metadata={
"stars": 1234,
"forks": 56,
"language": "Python",
"description": "A test project",
},
common_problems=[ common_problems=[
{ {
"title": "Installation fails on Windows", "title": "Installation fails on Windows",
@@ -95,7 +104,13 @@ hello()
}, },
], ],
known_solutions=[ known_solutions=[
{"title": "Fixed: Module not found", "number": 35, "state": "closed", "comments": 8, "labels": ["bug"]} {
"title": "Fixed: Module not found",
"number": 35,
"state": "closed",
"comments": 8,
"labels": ["bug"],
}
], ],
top_labels=[ top_labels=[
{"label": "bug", "count": 25}, {"label": "bug", "count": 25},
@@ -108,7 +123,9 @@ hello()
# Step 2: Run unified analyzer with basic depth # Step 2: Run unified analyzer with basic depth
analyzer = UnifiedCodebaseAnalyzer() analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source="https://github.com/test/project", depth="basic", fetch_github_metadata=True) result = analyzer.analyze(
source="https://github.com/test/project", depth="basic", fetch_github_metadata=True
)
# Step 3: Validate all three streams present # Step 3: Validate all three streams present
assert result.source_type == "github" assert result.source_type == "github"
@@ -151,7 +168,13 @@ hello()
"comments": 15, "comments": 15,
"labels": ["oauth", "token"], "labels": ["oauth", "token"],
}, },
{"title": "Async deadlock", "number": 40, "state": "open", "comments": 12, "labels": ["async", "bug"]}, {
"title": "Async deadlock",
"number": 40,
"state": "open",
"comments": 12,
"labels": ["async", "bug"],
},
{ {
"title": "Database connection lost", "title": "Database connection lost",
"number": 35, "number": 35,
@@ -162,8 +185,20 @@ hello()
] ]
solutions = [ solutions = [
{"title": "Fixed OAuth flow", "number": 30, "state": "closed", "comments": 8, "labels": ["oauth"]}, {
{"title": "Resolved async race", "number": 25, "state": "closed", "comments": 6, "labels": ["async"]}, "title": "Fixed OAuth flow",
"number": 30,
"state": "closed",
"comments": 8,
"labels": ["oauth"],
},
{
"title": "Resolved async race",
"number": 25,
"state": "closed",
"comments": 6,
"labels": ["async"],
},
] ]
topics = ["oauth", "auth", "authentication"] topics = ["oauth", "auth", "authentication"]
@@ -174,7 +209,9 @@ hello()
# Validate categorization # Validate categorization
assert "oauth" in categorized or "auth" in categorized or "authentication" in categorized assert "oauth" in categorized or "auth" in categorized or "authentication" in categorized
oauth_issues = ( oauth_issues = (
categorized.get("oauth", []) + categorized.get("auth", []) + categorized.get("authentication", []) categorized.get("oauth", [])
+ categorized.get("auth", [])
+ categorized.get("authentication", [])
) )
# Should have 3 OAuth-related issues (2 problems + 1 solution) # Should have 3 OAuth-related issues (2 problems + 1 solution)
@@ -245,7 +282,12 @@ testproject.run()
docs_files=[], docs_files=[],
) )
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 5000, "forks": 250, "language": "Python", "description": "Fast test framework"}, metadata={
"stars": 5000,
"forks": 250,
"language": "Python",
"description": "Fast test framework",
},
common_problems=[ common_problems=[
{ {
"title": "OAuth setup fails", "title": "OAuth setup fails",
@@ -254,8 +296,20 @@ testproject.run()
"comments": 30, "comments": 30,
"labels": ["bug", "oauth"], "labels": ["bug", "oauth"],
}, },
{"title": "Async deadlock", "number": 142, "state": "open", "comments": 25, "labels": ["async", "bug"]}, {
{"title": "Token refresh issue", "number": 130, "state": "open", "comments": 20, "labels": ["oauth"]}, "title": "Async deadlock",
"number": 142,
"state": "open",
"comments": 25,
"labels": ["async", "bug"],
},
{
"title": "Token refresh issue",
"number": 130,
"state": "open",
"comments": 20,
"labels": ["oauth"],
},
], ],
known_solutions=[ known_solutions=[
{ {
@@ -265,7 +319,13 @@ testproject.run()
"comments": 15, "comments": 15,
"labels": ["oauth"], "labels": ["oauth"],
}, },
{"title": "Resolved async race", "number": 110, "state": "closed", "comments": 12, "labels": ["async"]}, {
"title": "Resolved async race",
"number": 110,
"state": "closed",
"comments": 12,
"labels": ["async"],
},
], ],
top_labels=[ top_labels=[
{"label": "oauth", "count": 45}, {"label": "oauth", "count": 45},
@@ -276,7 +336,9 @@ testproject.run()
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Generate router # Generate router
generator = RouterGenerator([str(config_path1), str(config_path2)], github_streams=github_streams) generator = RouterGenerator(
[str(config_path1), str(config_path2)], github_streams=github_streams
)
# Step 1: Validate GitHub metadata extracted # Step 1: Validate GitHub metadata extracted
assert generator.github_metadata is not None assert generator.github_metadata is not None
@@ -308,8 +370,14 @@ testproject.run()
# Validate examples section with converted questions (Fix 1) # Validate examples section with converted questions (Fix 1)
assert "## Examples" in skill_md assert "## Examples" in skill_md
# Issues converted to natural questions # Issues converted to natural questions
assert "how do i fix oauth setup" in skill_md.lower() or "how do i handle oauth setup" in skill_md.lower() assert (
assert "how do i handle async deadlock" in skill_md.lower() or "how do i fix async deadlock" in skill_md.lower() "how do i fix oauth setup" in skill_md.lower()
or "how do i handle oauth setup" in skill_md.lower()
)
assert (
"how do i handle async deadlock" in skill_md.lower()
or "how do i fix async deadlock" in skill_md.lower()
)
# Common Issues section may still exist with other issues # Common Issues section may still exist with other issues
# Note: Issue numbers may appear in Common Issues or Common Patterns sections # Note: Issue numbers may appear in Common Issues or Common Patterns sections
@@ -356,12 +424,26 @@ class TestE2EQualityMetrics:
# Create GitHub streams with realistic data # Create GitHub streams with realistic data
code_stream = CodeStream(directory=tmp_path, files=[]) code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# Test\n\nA short README.", contributing=None, docs_files=[]) docs_stream = DocsStream(
readme="# Test\n\nA short README.", contributing=None, docs_files=[]
)
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 100, "forks": 10, "language": "Python", "description": "Test"}, metadata={"stars": 100, "forks": 10, "language": "Python", "description": "Test"},
common_problems=[ common_problems=[
{"title": "Issue 1", "number": 1, "state": "open", "comments": 5, "labels": ["bug"]}, {
{"title": "Issue 2", "number": 2, "state": "open", "comments": 3, "labels": ["bug"]}, "title": "Issue 1",
"number": 1,
"state": "open",
"comments": 5,
"labels": ["bug"],
},
{
"title": "Issue 2",
"number": 2,
"state": "open",
"comments": 3,
"labels": ["bug"],
},
], ],
known_solutions=[], known_solutions=[],
top_labels=[{"label": "bug", "count": 10}], top_labels=[{"label": "bug", "count": 10}],
@@ -382,7 +464,9 @@ class TestE2EQualityMetrics:
github_overhead = lines_with_github - lines_no_github github_overhead = lines_with_github - lines_no_github
# Validate overhead is within acceptable range (30-50 lines) # Validate overhead is within acceptable range (30-50 lines)
assert 20 <= github_overhead <= 60, f"GitHub overhead is {github_overhead} lines, expected 20-60" assert 20 <= github_overhead <= 60, (
f"GitHub overhead is {github_overhead} lines, expected 20-60"
)
def test_router_size_within_limits(self, tmp_path): def test_router_size_within_limits(self, tmp_path):
""" """
@@ -457,7 +541,9 @@ class TestE2EBackwardCompatibility:
code_stream = CodeStream(directory=tmp_path, files=[]) code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme=None, contributing=None, docs_files=[]) docs_stream = DocsStream(readme=None, contributing=None, docs_files=[])
insights_stream = InsightsStream(metadata={}, common_problems=[], known_solutions=[], top_labels=[]) insights_stream = InsightsStream(
metadata={}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams mock_fetcher.fetch.return_value = three_streams
@@ -490,8 +576,12 @@ class TestE2ETokenEfficiency:
# Create GitHub streams # Create GitHub streams
code_stream = CodeStream(directory=tmp_path, files=[tmp_path / "main.py"]) code_stream = CodeStream(directory=tmp_path, files=[tmp_path / "main.py"])
docs_stream = DocsStream(readme="# Test\n\nQuick start guide.", contributing=None, docs_files=[]) docs_stream = DocsStream(
insights_stream = InsightsStream(metadata={"stars": 100}, common_problems=[], known_solutions=[], top_labels=[]) readme="# Test\n\nQuick start guide.", contributing=None, docs_files=[]
)
insights_stream = InsightsStream(
metadata={"stars": 100}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Verify streams are separate (no duplication) # Verify streams are separate (no duplication)

View File

@@ -69,7 +69,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
import subprocess import subprocess
try: try:
result = subprocess.run(["skill-seekers", "estimate", "--help"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers", "estimate", "--help"], capture_output=True, text=True, timeout=5
)
# Should return successfully (0 or 2 for argparse) # Should return successfully (0 or 2 for argparse)
self.assertIn(result.returncode, [0, 2]) self.assertIn(result.returncode, [0, 2])
@@ -83,7 +85,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
import subprocess import subprocess
try: try:
result = subprocess.run(["skill-seekers-estimate", "--help"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers-estimate", "--help"], capture_output=True, text=True, timeout=5
)
# Should return successfully # Should return successfully
self.assertIn(result.returncode, [0, 2]) self.assertIn(result.returncode, [0, 2])
@@ -96,11 +100,15 @@ class TestEstimatePagesCLI(unittest.TestCase):
try: try:
# Run without config argument # Run without config argument
result = subprocess.run(["skill-seekers", "estimate"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers", "estimate"], capture_output=True, text=True, timeout=5
)
# Should fail (non-zero exit code) or show usage # Should fail (non-zero exit code) or show usage
self.assertTrue( self.assertTrue(
result.returncode != 0 or "usage" in result.stderr.lower() or "usage" in result.stdout.lower() result.returncode != 0
or "usage" in result.stderr.lower()
or "usage" in result.stdout.lower()
) )
except FileNotFoundError: except FileNotFoundError:
self.skipTest("skill-seekers command not installed") self.skipTest("skill-seekers command not installed")
@@ -111,7 +119,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
try: try:
# Run with --all flag # Run with --all flag
result = subprocess.run(["skill-seekers", "estimate", "--all"], capture_output=True, text=True, timeout=10) result = subprocess.run(
["skill-seekers", "estimate", "--all"], capture_output=True, text=True, timeout=10
)
# Should succeed # Should succeed
self.assertEqual(result.returncode, 0) self.assertEqual(result.returncode, 0)
@@ -125,7 +135,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
# Should list some known configs # Should list some known configs
# (these should exist in api/configs_repo/official/) # (these should exist in api/configs_repo/official/)
self.assertTrue( self.assertTrue(
"react" in output.lower() or "django" in output.lower() or "godot" in output.lower(), "react" in output.lower()
or "django" in output.lower()
or "godot" in output.lower(),
"Expected at least one known config name in output", "Expected at least one known config name in output",
) )
except FileNotFoundError: except FileNotFoundError:
@@ -136,7 +148,9 @@ class TestEstimatePagesCLI(unittest.TestCase):
import subprocess import subprocess
try: try:
result = subprocess.run(["skill-seekers-estimate", "--all"], capture_output=True, text=True, timeout=10) result = subprocess.run(
["skill-seekers-estimate", "--all"], capture_output=True, text=True, timeout=10
)
# Should succeed # Should succeed
self.assertEqual(result.returncode, 0) self.assertEqual(result.returncode, 0)

View File

@@ -60,7 +60,10 @@ class TestExcludedDirsAdditional(unittest.TestCase):
@patch("skill_seekers.cli.github_scraper.Github") @patch("skill_seekers.cli.github_scraper.Github")
def test_extend_with_additional_dirs(self, mock_github): def test_extend_with_additional_dirs(self, mock_github):
"""Test adding custom exclusions to defaults.""" """Test adding custom exclusions to defaults."""
config = {"repo": "owner/repo", "exclude_dirs_additional": ["proprietary", "vendor", "third_party"]} config = {
"repo": "owner/repo",
"exclude_dirs_additional": ["proprietary", "vendor", "third_party"],
}
scraper = GitHubScraper(config) scraper = GitHubScraper(config)
@@ -185,7 +188,11 @@ class TestExcludedDirsEdgeCases(unittest.TestCase):
"""Test that duplicates in additional list are handled (set deduplication).""" """Test that duplicates in additional list are handled (set deduplication)."""
config = { config = {
"repo": "owner/repo", "repo": "owner/repo",
"exclude_dirs_additional": ["venv", "custom", "venv"], # venv is duplicate (default + listed) "exclude_dirs_additional": [
"venv",
"custom",
"venv",
], # venv is duplicate (default + listed)
} }
scraper = GitHubScraper(config) scraper = GitHubScraper(config)
@@ -240,7 +247,11 @@ class TestExcludedDirsWithLocalRepo(unittest.TestCase):
@patch("skill_seekers.cli.github_scraper.Github") @patch("skill_seekers.cli.github_scraper.Github")
def test_replace_mode_with_local_repo_path(self, mock_github): def test_replace_mode_with_local_repo_path(self, mock_github):
"""Test that replace mode works with local_repo_path.""" """Test that replace mode works with local_repo_path."""
config = {"repo": "owner/repo", "local_repo_path": "/tmp/test/repo", "exclude_dirs": ["only_this"]} config = {
"repo": "owner/repo",
"local_repo_path": "/tmp/test/repo",
"exclude_dirs": ["only_this"],
}
scraper = GitHubScraper(config) scraper = GitHubScraper(config)
@@ -277,7 +288,10 @@ class TestExcludedDirsLogging(unittest.TestCase):
# Should have logged WARNING message # Should have logged WARNING message
warning_calls = [str(call) for call in mock_logger.warning.call_args_list] warning_calls = [str(call) for call in mock_logger.warning.call_args_list]
self.assertTrue( self.assertTrue(
any("Using custom directory exclusions" in call and "defaults overridden" in call for call in warning_calls) any(
"Using custom directory exclusions" in call and "defaults overridden" in call
for call in warning_calls
)
) )
@patch("skill_seekers.cli.github_scraper.Github") @patch("skill_seekers.cli.github_scraper.Github")

View File

@@ -105,9 +105,16 @@ class TestRouterGeneratorWithGitHub:
# Create GitHub streams # Create GitHub streams
code_stream = CodeStream(directory=tmp_path, files=[]) code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# Test Project\n\nA test OAuth library.", contributing=None, docs_files=[]) docs_stream = DocsStream(
readme="# Test Project\n\nA test OAuth library.", contributing=None, docs_files=[]
)
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 1234, "forks": 56, "language": "Python", "description": "OAuth helper"}, metadata={
"stars": 1234,
"forks": 56,
"language": "Python",
"description": "OAuth helper",
},
common_problems=[ common_problems=[
{ {
"title": "OAuth fails on redirect", "title": "OAuth fails on redirect",
@@ -133,7 +140,11 @@ class TestRouterGeneratorWithGitHub:
def test_extract_keywords_with_github_labels(self, tmp_path): def test_extract_keywords_with_github_labels(self, tmp_path):
"""Test keyword extraction with GitHub issue labels (2x weight).""" """Test keyword extraction with GitHub issue labels (2x weight)."""
config = {"name": "test-oauth", "base_url": "https://example.com", "categories": {"oauth": ["oauth", "auth"]}} config = {
"name": "test-oauth",
"base_url": "https://example.com",
"categories": {"oauth": ["oauth", "auth"]},
}
config_path = tmp_path / "config.json" config_path = tmp_path / "config.json"
with open(config_path, "w") as f: with open(config_path, "w") as f:
@@ -178,10 +189,17 @@ class TestRouterGeneratorWithGitHub:
# Create GitHub streams # Create GitHub streams
code_stream = CodeStream(directory=tmp_path, files=[]) code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream( docs_stream = DocsStream(
readme="# OAuth Library\n\nQuick start: Install with pip install oauth", contributing=None, docs_files=[] readme="# OAuth Library\n\nQuick start: Install with pip install oauth",
contributing=None,
docs_files=[],
) )
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 5000, "forks": 200, "language": "Python", "description": "OAuth 2.0 library"}, metadata={
"stars": 5000,
"forks": 200,
"language": "Python",
"description": "OAuth 2.0 library",
},
common_problems=[ common_problems=[
{ {
"title": "Redirect URI mismatch", "title": "Redirect URI mismatch",
@@ -190,7 +208,13 @@ class TestRouterGeneratorWithGitHub:
"comments": 25, "comments": 25,
"labels": ["bug", "oauth"], "labels": ["bug", "oauth"],
}, },
{"title": "Token refresh fails", "number": 95, "state": "open", "comments": 18, "labels": ["oauth"]}, {
"title": "Token refresh fails",
"number": 95,
"state": "open",
"comments": 18,
"labels": ["oauth"],
},
], ],
known_solutions=[], known_solutions=[],
top_labels=[], top_labels=[],
@@ -250,7 +274,11 @@ class TestSubSkillIssuesSection:
def test_generate_subskill_issues_section(self, tmp_path): def test_generate_subskill_issues_section(self, tmp_path):
"""Test generation of issues section for sub-skills.""" """Test generation of issues section for sub-skills."""
config = {"name": "test-oauth", "base_url": "https://example.com", "categories": {"oauth": ["oauth"]}} config = {
"name": "test-oauth",
"base_url": "https://example.com",
"categories": {"oauth": ["oauth"]},
}
config_path = tmp_path / "config.json" config_path = tmp_path / "config.json"
with open(config_path, "w") as f: with open(config_path, "w") as f:
@@ -269,10 +297,22 @@ class TestSubSkillIssuesSection:
"comments": 20, "comments": 20,
"labels": ["oauth", "bug"], "labels": ["oauth", "bug"],
}, },
{"title": "Token expiration issue", "number": 45, "state": "open", "comments": 15, "labels": ["oauth"]}, {
"title": "Token expiration issue",
"number": 45,
"state": "open",
"comments": 15,
"labels": ["oauth"],
},
], ],
known_solutions=[ known_solutions=[
{"title": "Fixed OAuth flow", "number": 40, "state": "closed", "comments": 10, "labels": ["oauth"]} {
"title": "Fixed OAuth flow",
"number": 40,
"state": "closed",
"comments": 10,
"labels": ["oauth"],
}
], ],
top_labels=[], top_labels=[],
) )
@@ -293,7 +333,11 @@ class TestSubSkillIssuesSection:
def test_generate_subskill_issues_no_matches(self, tmp_path): def test_generate_subskill_issues_no_matches(self, tmp_path):
"""Test issues section when no issues match the topic.""" """Test issues section when no issues match the topic."""
config = {"name": "test-async", "base_url": "https://example.com", "categories": {"async": ["async"]}} config = {
"name": "test-async",
"base_url": "https://example.com",
"categories": {"async": ["async"]},
}
config_path = tmp_path / "config.json" config_path = tmp_path / "config.json"
with open(config_path, "w") as f: with open(config_path, "w") as f:
@@ -305,7 +349,13 @@ class TestSubSkillIssuesSection:
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={}, metadata={},
common_problems=[ common_problems=[
{"title": "OAuth fails", "number": 1, "state": "open", "comments": 5, "labels": ["oauth"]} {
"title": "OAuth fails",
"number": 1,
"state": "open",
"comments": 5,
"labels": ["oauth"],
}
], ],
known_solutions=[], known_solutions=[],
top_labels=[], top_labels=[],
@@ -361,7 +411,12 @@ class TestIntegration:
], ],
) )
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 10000, "forks": 500, "language": "Python", "description": "Fast MCP server framework"}, metadata={
"stars": 10000,
"forks": 500,
"language": "Python",
"description": "Fast MCP server framework",
},
common_problems=[ common_problems=[
{ {
"title": "OAuth setup fails", "title": "OAuth setup fails",
@@ -370,8 +425,20 @@ class TestIntegration:
"comments": 30, "comments": 30,
"labels": ["bug", "oauth"], "labels": ["bug", "oauth"],
}, },
{"title": "Async deadlock", "number": 142, "state": "open", "comments": 25, "labels": ["async", "bug"]}, {
{"title": "Token refresh issue", "number": 130, "state": "open", "comments": 20, "labels": ["oauth"]}, "title": "Async deadlock",
"number": 142,
"state": "open",
"comments": 25,
"labels": ["async", "bug"],
},
{
"title": "Token refresh issue",
"number": 130,
"state": "open",
"comments": 20,
"labels": ["oauth"],
},
], ],
known_solutions=[ known_solutions=[
{ {
@@ -381,7 +448,13 @@ class TestIntegration:
"comments": 15, "comments": 15,
"labels": ["oauth"], "labels": ["oauth"],
}, },
{"title": "Resolved async race", "number": 110, "state": "closed", "comments": 12, "labels": ["async"]}, {
"title": "Resolved async race",
"number": 110,
"state": "closed",
"comments": 12,
"labels": ["async"],
},
], ],
top_labels=[ top_labels=[
{"label": "oauth", "count": 45}, {"label": "oauth", "count": 45},
@@ -392,7 +465,9 @@ class TestIntegration:
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Create router generator # Create router generator
generator = RouterGenerator([str(config_path1), str(config_path2)], github_streams=github_streams) generator = RouterGenerator(
[str(config_path1), str(config_path2)], github_streams=github_streams
)
# Generate SKILL.md # Generate SKILL.md
skill_md = generator.generate_skill_md() skill_md = generator.generate_skill_md()
@@ -414,8 +489,14 @@ class TestIntegration:
# 4. Examples section with converted questions (Fix 1) # 4. Examples section with converted questions (Fix 1)
assert "## Examples" in skill_md assert "## Examples" in skill_md
# Issues converted to natural questions # Issues converted to natural questions
assert "how do i fix oauth setup" in skill_md.lower() or "how do i handle oauth setup" in skill_md.lower() assert (
assert "how do i handle async deadlock" in skill_md.lower() or "how do i fix async deadlock" in skill_md.lower() "how do i fix oauth setup" in skill_md.lower()
or "how do i handle oauth setup" in skill_md.lower()
)
assert (
"how do i handle async deadlock" in skill_md.lower()
or "how do i fix async deadlock" in skill_md.lower()
)
# Common Issues section may still exist with other issues # Common Issues section may still exist with other issues
# Note: Issue numbers may appear in Common Issues or Common Patterns sections # Note: Issue numbers may appear in Common Issues or Common Patterns sections

View File

@@ -134,7 +134,9 @@ class TestCloneOrPull:
"""Test cloning a new repository.""" """Test cloning a new repository."""
mock_clone.return_value = MagicMock() mock_clone.return_value = MagicMock()
result = git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git") result = git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git"
)
assert result == git_repo.cache_dir / "test-source" assert result == git_repo.cache_dir / "test-source"
mock_clone.assert_called_once() mock_clone.assert_called_once()
@@ -159,7 +161,9 @@ class TestCloneOrPull:
mock_repo.remotes.origin = mock_origin mock_repo.remotes.origin = mock_origin
mock_repo_class.return_value = mock_repo mock_repo_class.return_value = mock_repo
result = git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git") result = git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git"
)
assert result == repo_path assert result == repo_path
mock_origin.pull.assert_called_once_with("main") mock_origin.pull.assert_called_once_with("main")
@@ -179,7 +183,9 @@ class TestCloneOrPull:
mock_repo_class.return_value = mock_repo mock_repo_class.return_value = mock_repo
result = git_repo.clone_or_pull( result = git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git", token="ghp_token123" source_name="test-source",
git_url="https://github.com/org/repo.git",
token="ghp_token123",
) )
# Verify URL was updated with token # Verify URL was updated with token
@@ -198,7 +204,9 @@ class TestCloneOrPull:
mock_clone.return_value = MagicMock() mock_clone.return_value = MagicMock()
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git", force_refresh=True) git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git", force_refresh=True
)
# Verify clone was called (not pull) # Verify clone was called (not pull)
mock_clone.assert_called_once() mock_clone.assert_called_once()
@@ -208,7 +216,9 @@ class TestCloneOrPull:
"""Test cloning with custom branch.""" """Test cloning with custom branch."""
mock_clone.return_value = MagicMock() mock_clone.return_value = MagicMock()
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git", branch="develop") git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git", branch="develop"
)
call_kwargs = mock_clone.call_args[1] call_kwargs = mock_clone.call_args[1]
assert call_kwargs["branch"] == "develop" assert call_kwargs["branch"] == "develop"
@@ -221,10 +231,14 @@ class TestCloneOrPull:
@patch("skill_seekers.mcp.git_repo.git.Repo.clone_from") @patch("skill_seekers.mcp.git_repo.git.Repo.clone_from")
def test_clone_auth_failure_error(self, mock_clone, git_repo): def test_clone_auth_failure_error(self, mock_clone, git_repo):
"""Test authentication failure error handling.""" """Test authentication failure error handling."""
mock_clone.side_effect = GitCommandError("clone", 128, stderr="fatal: Authentication failed") mock_clone.side_effect = GitCommandError(
"clone", 128, stderr="fatal: Authentication failed"
)
with pytest.raises(GitCommandError, match="Authentication failed"): with pytest.raises(GitCommandError, match="Authentication failed"):
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/repo.git") git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/repo.git"
)
@patch("skill_seekers.mcp.git_repo.git.Repo.clone_from") @patch("skill_seekers.mcp.git_repo.git.Repo.clone_from")
def test_clone_not_found_error(self, mock_clone, git_repo): def test_clone_not_found_error(self, mock_clone, git_repo):
@@ -232,7 +246,9 @@ class TestCloneOrPull:
mock_clone.side_effect = GitCommandError("clone", 128, stderr="fatal: repository not found") mock_clone.side_effect = GitCommandError("clone", 128, stderr="fatal: repository not found")
with pytest.raises(GitCommandError, match="Repository not found"): with pytest.raises(GitCommandError, match="Repository not found"):
git_repo.clone_or_pull(source_name="test-source", git_url="https://github.com/org/nonexistent.git") git_repo.clone_or_pull(
source_name="test-source", git_url="https://github.com/org/nonexistent.git"
)
class TestFindConfigs: class TestFindConfigs:

View File

@@ -276,7 +276,9 @@ class TestGitSourcesE2E:
git_repo = GitConfigRepo(cache_dir=cache_dir) git_repo = GitConfigRepo(cache_dir=cache_dir)
# Step 1: Clone repository # Step 1: Clone repository
repo_path = git_repo.clone_or_pull(source_name="test-pull", git_url=git_url, branch="master") repo_path = git_repo.clone_or_pull(
source_name="test-pull", git_url=git_url, branch="master"
)
initial_configs = git_repo.find_configs(repo_path) initial_configs = git_repo.find_configs(repo_path)
assert len(initial_configs) == 3 assert len(initial_configs) == 3
@@ -333,7 +335,9 @@ class TestGitSourcesE2E:
git_repo = GitConfigRepo(cache_dir=cache_dir) git_repo = GitConfigRepo(cache_dir=cache_dir)
# Step 1: Clone repository # Step 1: Clone repository
repo_path = git_repo.clone_or_pull(source_name="test-refresh", git_url=git_url, branch="master") repo_path = git_repo.clone_or_pull(
source_name="test-refresh", git_url=git_url, branch="master"
)
# Step 2: Modify local cache manually # Step 2: Modify local cache manually
corrupt_file = repo_path / "CORRUPTED.txt" corrupt_file = repo_path / "CORRUPTED.txt"
@@ -371,7 +375,9 @@ class TestGitSourcesE2E:
git_repo = GitConfigRepo(cache_dir=cache_dir) git_repo = GitConfigRepo(cache_dir=cache_dir)
# Step 1: Clone repository # Step 1: Clone repository
repo_path = git_repo.clone_or_pull(source_name="test-not-found", git_url=git_url, branch="master") repo_path = git_repo.clone_or_pull(
source_name="test-not-found", git_url=git_url, branch="master"
)
# Step 2: Try to fetch non-existent config # Step 2: Try to fetch non-existent config
with pytest.raises(FileNotFoundError) as exc_info: with pytest.raises(FileNotFoundError) as exc_info:
@@ -401,7 +407,9 @@ class TestGitSourcesE2E:
for invalid_url in invalid_urls: for invalid_url in invalid_urls:
with pytest.raises(ValueError, match="Invalid git URL"): with pytest.raises(ValueError, match="Invalid git URL"):
git_repo.clone_or_pull(source_name="test-invalid", git_url=invalid_url, branch="master") git_repo.clone_or_pull(
source_name="test-invalid", git_url=invalid_url, branch="master"
)
def test_e2e_source_name_validation(self, temp_dirs): def test_e2e_source_name_validation(self, temp_dirs):
""" """
@@ -496,11 +504,15 @@ class TestGitSourcesE2E:
# Step 1: Clone to cache_dir_1 # Step 1: Clone to cache_dir_1
git_repo_1 = GitConfigRepo(cache_dir=cache_dir_1) git_repo_1 = GitConfigRepo(cache_dir=cache_dir_1)
repo_path_1 = git_repo_1.clone_or_pull(source_name="test-source", git_url=git_url, branch="master") repo_path_1 = git_repo_1.clone_or_pull(
source_name="test-source", git_url=git_url, branch="master"
)
# Step 2: Clone same repo to cache_dir_2 # Step 2: Clone same repo to cache_dir_2
git_repo_2 = GitConfigRepo(cache_dir=cache_dir_2) git_repo_2 = GitConfigRepo(cache_dir=cache_dir_2)
repo_path_2 = git_repo_2.clone_or_pull(source_name="test-source", git_url=git_url, branch="master") repo_path_2 = git_repo_2.clone_or_pull(
source_name="test-source", git_url=git_url, branch="master"
)
# Step 3: Verify both caches are independent # Step 3: Verify both caches are independent
assert repo_path_1 != repo_path_2 assert repo_path_1 != repo_path_2
@@ -621,7 +633,9 @@ class TestGitSourcesE2E:
repo.index.commit("Increase React config max_pages to 500") repo.index.commit("Increase React config max_pages to 500")
# Step 6: Developers pull updates # Step 6: Developers pull updates
git_repo.clone_or_pull(source_name=source["name"], git_url=source["git_url"], branch=source["branch"]) git_repo.clone_or_pull(
source_name=source["name"], git_url=source["git_url"], branch=source["branch"]
)
updated_config = git_repo.get_config(repo_path, "react") updated_config = git_repo.get_config(repo_path, "react")
assert updated_config["max_pages"] == 500 assert updated_config["max_pages"] == 500
@@ -631,7 +645,9 @@ class TestGitSourcesE2E:
repo.index.remove(["react.json"]) repo.index.remove(["react.json"])
repo.index.commit("Remove react.json") repo.index.commit("Remove react.json")
git_repo.clone_or_pull(source_name=source["name"], git_url=source["git_url"], branch=source["branch"]) git_repo.clone_or_pull(
source_name=source["name"], git_url=source["git_url"], branch=source["branch"]
)
# Step 8: Error handling works correctly # Step 8: Error handling works correctly
with pytest.raises(FileNotFoundError, match="react.json"): with pytest.raises(FileNotFoundError, match="react.json"):
@@ -700,7 +716,11 @@ class TestMCPToolsE2E:
""" """
MCP E2E Test 1: Complete add/list/remove workflow via MCP tools MCP E2E Test 1: Complete add/list/remove workflow via MCP tools
""" """
from skill_seekers.mcp.server import add_config_source_tool, list_config_sources_tool, remove_config_source_tool from skill_seekers.mcp.server import (
add_config_source_tool,
list_config_sources_tool,
remove_config_source_tool,
)
cache_dir, config_dir = temp_dirs cache_dir, config_dir = temp_dirs
repo_dir, repo = temp_git_repo repo_dir, repo = temp_git_repo
@@ -708,7 +728,12 @@ class TestMCPToolsE2E:
# Add source # Add source
add_result = await add_config_source_tool( add_result = await add_config_source_tool(
{"name": "mcp-test-source", "git_url": git_url, "source_type": "custom", "branch": "master"} {
"name": "mcp-test-source",
"git_url": git_url,
"source_type": "custom",
"branch": "master",
}
) )
assert len(add_result) == 1 assert len(add_result) == 1
@@ -744,7 +769,12 @@ class TestMCPToolsE2E:
dest_dir.mkdir(parents=True, exist_ok=True) dest_dir.mkdir(parents=True, exist_ok=True)
result = await fetch_config_tool( result = await fetch_config_tool(
{"config_name": "test-framework", "git_url": git_url, "branch": "master", "destination": str(dest_dir)} {
"config_name": "test-framework",
"git_url": git_url,
"branch": "master",
"destination": str(dest_dir),
}
) )
assert len(result) == 1 assert len(result) == 1
@@ -831,10 +861,16 @@ class TestMCPToolsE2E:
assert "" in result[0].text or "not found" in result[0].text.lower() assert "" in result[0].text or "not found" in result[0].text.lower()
# Test 5: Fetch non-existent config from valid source # Test 5: Fetch non-existent config from valid source
await add_config_source_tool({"name": "valid-source", "git_url": git_url, "branch": "master"}) await add_config_source_tool(
{"name": "valid-source", "git_url": git_url, "branch": "master"}
)
result = await fetch_config_tool( result = await fetch_config_tool(
{"config_name": "non-existent-config", "source": "valid-source", "destination": str(dest_dir)} {
"config_name": "non-existent-config",
"source": "valid-source",
"destination": str(dest_dir),
}
) )
assert "" in result[0].text or "not found" in result[0].text.lower() assert "" in result[0].text or "not found" in result[0].text.lower()

View File

@@ -189,7 +189,13 @@ class TestIssueAnalysis:
def test_analyze_issues_known_solutions(self): def test_analyze_issues_known_solutions(self):
"""Test extraction of known solutions (closed issues with comments).""" """Test extraction of known solutions (closed issues with comments)."""
issues = [ issues = [
{"title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": [{"name": "bug"}]}, {
"title": "Fixed OAuth",
"number": 35,
"state": "closed",
"comments": 5,
"labels": [{"name": "bug"}],
},
{ {
"title": "Closed without comments", "title": "Closed without comments",
"number": 36, "number": 36,
@@ -239,7 +245,10 @@ class TestIssueAnalysis:
assert len(insights["common_problems"]) <= 10 assert len(insights["common_problems"]) <= 10
# Should be sorted by comment count (descending) # Should be sorted by comment count (descending)
if len(insights["common_problems"]) > 1: if len(insights["common_problems"]) > 1:
assert insights["common_problems"][0]["comments"] >= insights["common_problems"][1]["comments"] assert (
insights["common_problems"][0]["comments"]
>= insights["common_problems"][1]["comments"]
)
class TestGitHubAPI: class TestGitHubAPI:
@@ -286,7 +295,13 @@ class TestGitHubAPI:
"""Test fetching issues via GitHub API.""" """Test fetching issues via GitHub API."""
mock_response = Mock() mock_response = Mock()
mock_response.json.return_value = [ mock_response.json.return_value = [
{"title": "Bug", "number": 42, "state": "open", "comments": 10, "labels": [{"name": "bug"}]} {
"title": "Bug",
"number": 42,
"state": "open",
"comments": 10,
"labels": [{"name": "bug"}],
}
] ]
mock_response.raise_for_status = Mock() mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response mock_get.return_value = mock_response
@@ -304,7 +319,14 @@ class TestGitHubAPI:
mock_response = Mock() mock_response = Mock()
mock_response.json.return_value = [ mock_response.json.return_value = [
{"title": "Issue", "number": 42, "state": "open", "comments": 5, "labels": []}, {"title": "Issue", "number": 42, "state": "open", "comments": 5, "labels": []},
{"title": "PR", "number": 43, "state": "open", "comments": 3, "labels": [], "pull_request": {}}, {
"title": "PR",
"number": 43,
"state": "open",
"comments": 3,
"labels": [],
"pull_request": {},
},
] ]
mock_response.raise_for_status = Mock() mock_response.raise_for_status = Mock()
mock_get.return_value = mock_response mock_get.return_value = mock_response
@@ -376,7 +398,13 @@ class TestIntegration:
else: else:
# Issues call # Issues call
mock_response.json.return_value = [ mock_response.json.return_value = [
{"title": "Test Issue", "number": 42, "state": "open", "comments": 10, "labels": [{"name": "bug"}]} {
"title": "Test Issue",
"number": 42,
"state": "open",
"comments": 10,
"labels": [{"name": "bug"}],
}
] ]
return mock_response return mock_response

View File

@@ -587,7 +587,9 @@ class TestGitHubToSkillConverter(unittest.TestCase):
config = {"repo": "facebook/react", "name": "test", "description": "Test skill"} config = {"repo": "facebook/react", "name": "test", "description": "Test skill"}
# Patch the paths to use our temp directory # Patch the paths to use our temp directory
with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter._load_data") as mock_load: with patch(
"skill_seekers.cli.github_scraper.GitHubToSkillConverter._load_data"
) as mock_load:
mock_load.return_value = self.mock_data mock_load.return_value = self.mock_data
converter = self.GitHubToSkillConverter(config) converter = self.GitHubToSkillConverter(config)
converter.skill_dir = str(self.output_dir / "test_skill") converter.skill_dir = str(self.output_dir / "test_skill")
@@ -677,7 +679,10 @@ class TestSymlinkHandling(unittest.TestCase):
scraper.repo = Mock() scraper.repo = Mock()
# First call returns symlink, second call raises 404 # First call returns symlink, second call raises 404
scraper.repo.get_contents.side_effect = [mock_symlink, GithubException(404, "Not found")] scraper.repo.get_contents.side_effect = [
mock_symlink,
GithubException(404, "Not found"),
]
result = scraper._get_file_content("README.md") result = scraper._get_file_content("README.md")
@@ -729,7 +734,9 @@ class TestSymlinkHandling(unittest.TestCase):
# Should successfully extract README content # Should successfully extract README content
self.assertIn("readme", scraper.extracted_data) self.assertIn("readme", scraper.extracted_data)
self.assertEqual(scraper.extracted_data["readme"], "# AI SDK\n\nThe AI SDK is a TypeScript toolkit") self.assertEqual(
scraper.extracted_data["readme"], "# AI SDK\n\nThe AI SDK is a TypeScript toolkit"
)
def test_extract_changelog_with_symlink(self): def test_extract_changelog_with_symlink(self):
"""Test CHANGELOG extraction with symlinked CHANGELOG.md""" """Test CHANGELOG extraction with symlinked CHANGELOG.md"""
@@ -789,7 +796,9 @@ class TestSymlinkHandling(unittest.TestCase):
mock_content.type = "file" mock_content.type = "file"
mock_content.encoding = "none" # Large files have encoding="none" mock_content.encoding = "none" # Large files have encoding="none"
mock_content.size = 1388271 # 1.4MB CHANGELOG mock_content.size = 1388271 # 1.4MB CHANGELOG
mock_content.download_url = "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md" mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
)
with patch("skill_seekers.cli.github_scraper.Github"): with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config) scraper = self.GitHubScraper(config)
@@ -820,7 +829,9 @@ class TestSymlinkHandling(unittest.TestCase):
mock_content.type = "file" mock_content.type = "file"
mock_content.encoding = "none" mock_content.encoding = "none"
mock_content.size = 1388271 mock_content.size = 1388271
mock_content.download_url = "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md" mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
)
with patch("skill_seekers.cli.github_scraper.Github"): with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config) scraper = self.GitHubScraper(config)

View File

@@ -15,7 +15,12 @@ from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
from skill_seekers.cli.guide_enhancer import GuideEnhancer, PrerequisiteItem, StepEnhancement, TroubleshootingItem from skill_seekers.cli.guide_enhancer import (
GuideEnhancer,
PrerequisiteItem,
StepEnhancement,
TroubleshootingItem,
)
class TestGuideEnhancerModeDetection: class TestGuideEnhancerModeDetection:
@@ -25,7 +30,9 @@ class TestGuideEnhancerModeDetection:
"""Test auto mode detects API when key present and library available""" """Test auto mode detects API when key present and library available"""
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="auto") enhancer = GuideEnhancer(mode="auto")
# Will be 'api' if library available, otherwise 'local' or 'none' # Will be 'api' if library available, otherwise 'local' or 'none'
@@ -96,7 +103,9 @@ class TestGuideEnhancerStepDescriptions:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api": if enhancer.mode != "api":
@@ -104,7 +113,12 @@ class TestGuideEnhancerStepDescriptions:
enhancer.client = Mock() # Mock the client enhancer.client = Mock() # Mock the client
steps = [{"description": "scraper.scrape(url)", "code": "result = scraper.scrape(url)"}] steps = [
{
"description": "scraper.scrape(url)",
"code": "result = scraper.scrape(url)",
}
]
result = enhancer.enhance_step_descriptions(steps) result = enhancer.enhance_step_descriptions(steps)
assert len(result) == 1 assert len(result) == 1
@@ -129,7 +143,11 @@ class TestGuideEnhancerTroubleshooting:
def test_enhance_troubleshooting_none_mode(self): def test_enhance_troubleshooting_none_mode(self):
"""Test troubleshooting in none mode""" """Test troubleshooting in none mode"""
enhancer = GuideEnhancer(mode="none") enhancer = GuideEnhancer(mode="none")
guide_data = {"title": "Test Guide", "steps": [{"description": "test", "code": "code"}], "language": "python"} guide_data = {
"title": "Test Guide",
"steps": [{"description": "test", "code": "code"}],
"language": "python",
}
result = enhancer.enhance_troubleshooting(guide_data) result = enhancer.enhance_troubleshooting(guide_data)
assert result == [] assert result == []
@@ -151,7 +169,9 @@ class TestGuideEnhancerTroubleshooting:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api": if enhancer.mode != "api":
@@ -196,7 +216,11 @@ class TestGuideEnhancerPrerequisites:
mock_call.return_value = json.dumps( mock_call.return_value = json.dumps(
{ {
"prerequisites_detailed": [ "prerequisites_detailed": [
{"name": "requests", "why": "HTTP client for making web requests", "setup": "pip install requests"}, {
"name": "requests",
"why": "HTTP client for making web requests",
"setup": "pip install requests",
},
{ {
"name": "beautifulsoup4", "name": "beautifulsoup4",
"why": "HTML/XML parser for web scraping", "why": "HTML/XML parser for web scraping",
@@ -208,7 +232,9 @@ class TestGuideEnhancerPrerequisites:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api": if enhancer.mode != "api":
@@ -240,12 +266,20 @@ class TestGuideEnhancerNextSteps:
def test_enhance_next_steps_api_mode(self, mock_call): def test_enhance_next_steps_api_mode(self, mock_call):
"""Test next steps with API mode""" """Test next steps with API mode"""
mock_call.return_value = json.dumps( mock_call.return_value = json.dumps(
{"next_steps": ["How to handle async workflows", "How to add error handling", "How to implement caching"]} {
"next_steps": [
"How to handle async workflows",
"How to add error handling",
"How to implement caching",
]
}
) )
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api": if enhancer.mode != "api":
@@ -285,7 +319,9 @@ class TestGuideEnhancerUseCases:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api": if enhancer.mode != "api":
@@ -293,7 +329,10 @@ class TestGuideEnhancerUseCases:
enhancer.client = Mock() enhancer.client = Mock()
guide_data = {"title": "How to Scrape Docs", "description": "Documentation scraping"} guide_data = {
"title": "How to Scrape Docs",
"description": "Documentation scraping",
}
result = enhancer.enhance_use_cases(guide_data) result = enhancer.enhance_use_cases(guide_data)
assert len(result) == 2 assert len(result) == 2
@@ -332,7 +371,11 @@ class TestGuideEnhancerFullWorkflow:
{ {
"step_descriptions": [ "step_descriptions": [
{"step_index": 0, "explanation": "Import required libraries", "variations": []}, {"step_index": 0, "explanation": "Import required libraries", "variations": []},
{"step_index": 1, "explanation": "Initialize scraper instance", "variations": []}, {
"step_index": 1,
"explanation": "Initialize scraper instance",
"variations": [],
},
], ],
"troubleshooting": [ "troubleshooting": [
{ {
@@ -342,7 +385,9 @@ class TestGuideEnhancerFullWorkflow:
"solution": "pip install requests", "solution": "pip install requests",
} }
], ],
"prerequisites_detailed": [{"name": "requests", "why": "HTTP client", "setup": "pip install requests"}], "prerequisites_detailed": [
{"name": "requests", "why": "HTTP client", "setup": "pip install requests"}
],
"next_steps": ["How to add authentication"], "next_steps": ["How to add authentication"],
"use_cases": ["Automate documentation extraction"], "use_cases": ["Automate documentation extraction"],
} }
@@ -350,7 +395,9 @@ class TestGuideEnhancerFullWorkflow:
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}): with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}):
with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True): with patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True):
with patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic: with patch(
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic:
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
if enhancer.mode != "api": if enhancer.mode != "api":
@@ -508,7 +555,11 @@ class TestGuideEnhancerResponseParsing:
} }
) )
guide_data = {"title": "Test", "steps": [{"description": "Test", "code": "test"}], "language": "python"} guide_data = {
"title": "Test",
"steps": [{"description": "Test", "code": "test"}],
"language": "python",
}
result = enhancer._parse_enhancement_response(response, guide_data) result = enhancer._parse_enhancement_response(response, guide_data)

View File

@@ -121,7 +121,10 @@ def test_workflow():
def test_calculate_complexity(self): def test_calculate_complexity(self):
"""Test complexity level calculation""" """Test complexity level calculation"""
# Simple workflow - beginner # Simple workflow - beginner
simple_steps = [WorkflowStep(1, "x = 1", "Assign variable"), WorkflowStep(2, "print(x)", "Print variable")] simple_steps = [
WorkflowStep(1, "x = 1", "Assign variable"),
WorkflowStep(2, "print(x)", "Print variable"),
]
simple_workflow = {"code": "x = 1\nprint(x)", "category": "workflow"} simple_workflow = {"code": "x = 1\nprint(x)", "category": "workflow"}
complexity_simple = self.analyzer._calculate_complexity(simple_steps, simple_workflow) complexity_simple = self.analyzer._calculate_complexity(simple_steps, simple_workflow)
self.assertEqual(complexity_simple, "beginner") self.assertEqual(complexity_simple, "beginner")
@@ -129,7 +132,9 @@ def test_workflow():
# Complex workflow - advanced # Complex workflow - advanced
complex_steps = [WorkflowStep(i, f"step{i}", f"Step {i}") for i in range(1, 8)] complex_steps = [WorkflowStep(i, f"step{i}", f"Step {i}") for i in range(1, 8)]
complex_workflow = { complex_workflow = {
"code": "\n".join([f"async def step{i}(): await complex_operation()" for i in range(7)]), "code": "\n".join(
[f"async def step{i}(): await complex_operation()" for i in range(7)]
),
"category": "workflow", "category": "workflow",
} }
complexity_complex = self.analyzer._calculate_complexity(complex_steps, complex_workflow) complexity_complex = self.analyzer._calculate_complexity(complex_steps, complex_workflow)
@@ -466,8 +471,12 @@ class TestHowToGuideBuilder(unittest.TestCase):
def test_create_collection(self): def test_create_collection(self):
"""Test guide collection creation with metadata""" """Test guide collection creation with metadata"""
guides = [ guides = [
HowToGuide(guide_id="guide-1", title="Guide 1", overview="Test", complexity_level="beginner"), HowToGuide(
HowToGuide(guide_id="guide-2", title="Guide 2", overview="Test", complexity_level="advanced"), guide_id="guide-1", title="Guide 1", overview="Test", complexity_level="beginner"
),
HowToGuide(
guide_id="guide-2", title="Guide 2", overview="Test", complexity_level="advanced"
),
] ]
collection = self.builder._create_collection(guides) collection = self.builder._create_collection(guides)
@@ -492,7 +501,10 @@ class TestHowToGuideBuilder(unittest.TestCase):
# Correct attribute names # Correct attribute names
collection = GuideCollection( collection = GuideCollection(
total_guides=1, guides=guides, guides_by_complexity={"beginner": 1}, guides_by_use_case={} total_guides=1,
guides=guides,
guides_by_complexity={"beginner": 1},
guides_by_use_case={},
) )
output_dir = Path(self.temp_dir) output_dir = Path(self.temp_dir)
@@ -905,7 +917,10 @@ def test_file_processing():
output_dir = Path(self.temp_dir) / "guides_fallback" output_dir = Path(self.temp_dir) / "guides_fallback"
# Mock GuideEnhancer to raise exception # Mock GuideEnhancer to raise exception
with patch("skill_seekers.cli.guide_enhancer.GuideEnhancer", side_effect=Exception("AI unavailable")): with patch(
"skill_seekers.cli.guide_enhancer.GuideEnhancer",
side_effect=Exception("AI unavailable"),
):
# Should NOT crash - graceful fallback # Should NOT crash - graceful fallback
collection = builder.build_guides_from_examples( collection = builder.build_guides_from_examples(
examples=examples, examples=examples,

View File

@@ -328,7 +328,9 @@ class TestInstallToAllAgents:
def mock_get_agent_path(agent_name, project_root=None): def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills" return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path): with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
results = install_to_all_agents(self.skill_dir, force=True) results = install_to_all_agents(self.skill_dir, force=True)
assert len(results) == 11 assert len(results) == 11
@@ -357,7 +359,9 @@ class TestInstallToAllAgents:
def mock_get_agent_path(agent_name, project_root=None): def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills" return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path): with patch(
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
# Without force - should fail # Without force - should fail
results_no_force = install_to_all_agents(self.skill_dir, force=False) results_no_force = install_to_all_agents(self.skill_dir, force=False)
# All should fail because directories exist # All should fail because directories exist
@@ -400,7 +404,10 @@ class TestInstallAgentCLI:
def test_cli_help_output(self): def test_cli_help_output(self):
"""Test that --help shows usage information.""" """Test that --help shows usage information."""
with pytest.raises(SystemExit) as exc_info, patch("sys.argv", ["install_agent.py", "--help"]): with (
pytest.raises(SystemExit) as exc_info,
patch("sys.argv", ["install_agent.py", "--help"]),
):
main() main()
# --help exits with code 0 # --help exits with code 0
@@ -422,8 +429,13 @@ class TestInstallAgentCLI:
def mock_get_agent_path(agent_name, project_root=None): def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills" return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path): with patch(
with patch("sys.argv", ["install_agent.py", str(self.skill_dir), "--agent", "claude", "--dry-run"]): "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
with patch(
"sys.argv",
["install_agent.py", str(self.skill_dir), "--agent", "claude", "--dry-run"],
):
exit_code = main() exit_code = main()
assert exit_code == 0 assert exit_code == 0
@@ -437,8 +449,13 @@ class TestInstallAgentCLI:
def mock_get_agent_path(agent_name, project_root=None): def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills" return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path): with patch(
with patch("sys.argv", ["install_agent.py", str(self.skill_dir), "--agent", "claude", "--force"]): "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
with patch(
"sys.argv",
["install_agent.py", str(self.skill_dir), "--agent", "claude", "--force"],
):
exit_code = main() exit_code = main()
assert exit_code == 0 assert exit_code == 0
@@ -454,8 +471,13 @@ class TestInstallAgentCLI:
def mock_get_agent_path(agent_name, project_root=None): def mock_get_agent_path(agent_name, project_root=None):
return Path(agent_tmpdir) / f".{agent_name}" / "skills" return Path(agent_tmpdir) / f".{agent_name}" / "skills"
with patch("skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path): with patch(
with patch("sys.argv", ["install_agent.py", str(self.skill_dir), "--agent", "all", "--force"]): "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
):
with patch(
"sys.argv",
["install_agent.py", str(self.skill_dir), "--agent", "all", "--force"],
):
exit_code = main() exit_code = main()
assert exit_code == 0 assert exit_code == 0

View File

@@ -23,7 +23,9 @@ class TestInstallCLI(unittest.TestCase):
# Create parser like install_skill.py does # Create parser like install_skill.py does
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--config", required=True) parser.add_argument("--config", required=True)
parser.add_argument("--target", choices=["claude", "gemini", "openai", "markdown"], default="claude") parser.add_argument(
"--target", choices=["claude", "gemini", "openai", "markdown"], default="claude"
)
# Test that each platform is accepted # Test that each platform is accepted
for platform in ["claude", "gemini", "openai", "markdown"]: for platform in ["claude", "gemini", "openai", "markdown"]:
@@ -43,7 +45,9 @@ class TestInstallCLI(unittest.TestCase):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--config", required=True) parser.add_argument("--config", required=True)
parser.add_argument("--target", choices=["claude", "gemini", "openai", "markdown"], default="claude") parser.add_argument(
"--target", choices=["claude", "gemini", "openai", "markdown"], default="claude"
)
# Should raise SystemExit for invalid target # Should raise SystemExit for invalid target
with self.assertRaises(SystemExit): with self.assertRaises(SystemExit):
@@ -62,7 +66,10 @@ class TestInstallToolMultiPlatform(unittest.IsolatedAsyncioTestCase):
for target in ["claude", "gemini", "openai"]: for target in ["claude", "gemini", "openai"]:
# Use dry_run=True which skips actual execution # Use dry_run=True which skips actual execution
# It will still show us the platform is being recognized # It will still show us the platform is being recognized
with patch("builtins.open", create=True) as mock_open, patch("json.load") as mock_json_load: with (
patch("builtins.open", create=True) as mock_open,
patch("json.load") as mock_json_load,
):
# Mock config file reading # Mock config file reading
mock_json_load.return_value = {"name": "test-skill"} mock_json_load.return_value = {"name": "test-skill"}
mock_file = MagicMock() mock_file = MagicMock()

View File

@@ -44,7 +44,9 @@ class TestInstallSkillValidation:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_validation_both_configs(self): async def test_validation_both_configs(self):
"""Test error when both config_name and config_path provided""" """Test error when both config_name and config_path provided"""
result = await install_skill_tool({"config_name": "react", "config_path": "configs/react.json"}) result = await install_skill_tool(
{"config_name": "react", "config_path": "configs/react.json"}
)
assert len(result) == 1 assert len(result) == 1
assert isinstance(result[0], TextContent) assert isinstance(result[0], TextContent)
@@ -114,7 +116,10 @@ class TestInstallSkillEnhancementMandatory:
# Verify enhancement phase is present # Verify enhancement phase is present
assert "AI Enhancement (MANDATORY)" in output assert "AI Enhancement (MANDATORY)" in output
assert "Enhancement is REQUIRED for quality (3/10→9/10 boost)" in output or "REQUIRED for quality" in output assert (
"Enhancement is REQUIRED for quality (3/10→9/10 boost)" in output
or "REQUIRED for quality" in output
)
# Verify it's not optional # Verify it's not optional
assert "MANDATORY" in output assert "MANDATORY" in output
@@ -134,13 +139,23 @@ class TestInstallSkillPhaseOrchestration:
@patch("builtins.open") @patch("builtins.open")
@patch("os.environ.get") @patch("os.environ.get")
async def test_full_workflow_with_fetch( async def test_full_workflow_with_fetch(
self, mock_env_get, mock_open, mock_upload, mock_package, mock_subprocess, mock_scrape, mock_fetch self,
mock_env_get,
mock_open,
mock_upload,
mock_package,
mock_subprocess,
mock_scrape,
mock_fetch,
): ):
"""Test complete workflow when config_name is provided""" """Test complete workflow when config_name is provided"""
# Mock fetch_config response # Mock fetch_config response
mock_fetch.return_value = [ mock_fetch.return_value = [
TextContent(type="text", text="✅ Config fetched successfully\n\nConfig saved to: configs/react.json") TextContent(
type="text",
text="✅ Config fetched successfully\n\nConfig saved to: configs/react.json",
)
] ]
# Mock config file read # Mock config file read
@@ -159,7 +174,9 @@ class TestInstallSkillPhaseOrchestration:
mock_subprocess.return_value = ("✅ Enhancement complete", "", 0) mock_subprocess.return_value = ("✅ Enhancement complete", "", 0)
# Mock package response # Mock package response
mock_package.return_value = [TextContent(type="text", text="✅ Package complete\n\nSaved to: output/react.zip")] mock_package.return_value = [
TextContent(type="text", text="✅ Package complete\n\nSaved to: output/react.zip")
]
# Mock upload response # Mock upload response
mock_upload.return_value = [TextContent(type="text", text="✅ Upload successful")] mock_upload.return_value = [TextContent(type="text", text="✅ Upload successful")]
@@ -220,7 +237,9 @@ class TestInstallSkillPhaseOrchestration:
mock_env_get.return_value = "" mock_env_get.return_value = ""
# Run the workflow # Run the workflow
result = await install_skill_tool({"config_path": "configs/custom.json", "auto_upload": True}) result = await install_skill_tool(
{"config_path": "configs/custom.json", "auto_upload": True}
)
output = result[0].text output = result[0].text
@@ -248,7 +267,9 @@ class TestInstallSkillErrorHandling:
"""Test handling of fetch phase failure""" """Test handling of fetch phase failure"""
# Mock fetch failure # Mock fetch failure
mock_fetch.return_value = [TextContent(type="text", text="❌ Failed to fetch config: Network error")] mock_fetch.return_value = [
TextContent(type="text", text="❌ Failed to fetch config: Network error")
]
result = await install_skill_tool({"config_name": "react"}) result = await install_skill_tool({"config_name": "react"})
@@ -271,7 +292,9 @@ class TestInstallSkillErrorHandling:
mock_open.return_value = mock_file mock_open.return_value = mock_file
# Mock scrape failure # Mock scrape failure
mock_scrape.return_value = [TextContent(type="text", text="❌ Scraping failed: Connection timeout")] mock_scrape.return_value = [
TextContent(type="text", text="❌ Scraping failed: Connection timeout")
]
result = await install_skill_tool({"config_path": "configs/test.json"}) result = await install_skill_tool({"config_path": "configs/test.json"})
@@ -317,7 +340,9 @@ class TestInstallSkillOptions:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_no_upload_option(self): async def test_no_upload_option(self):
"""Test that no_upload option skips upload phase""" """Test that no_upload option skips upload phase"""
result = await install_skill_tool({"config_name": "react", "auto_upload": False, "dry_run": True}) result = await install_skill_tool(
{"config_name": "react", "auto_upload": False, "dry_run": True}
)
output = result[0].text output = result[0].text
@@ -328,7 +353,9 @@ class TestInstallSkillOptions:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_unlimited_option(self): async def test_unlimited_option(self):
"""Test that unlimited option is passed to scraper""" """Test that unlimited option is passed to scraper"""
result = await install_skill_tool({"config_path": "configs/react.json", "unlimited": True, "dry_run": True}) result = await install_skill_tool(
{"config_path": "configs/react.json", "unlimited": True, "dry_run": True}
)
output = result[0].text output = result[0].text
@@ -338,7 +365,9 @@ class TestInstallSkillOptions:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_custom_destination(self): async def test_custom_destination(self):
"""Test custom destination directory""" """Test custom destination directory"""
result = await install_skill_tool({"config_name": "react", "destination": "/tmp/skills", "dry_run": True}) result = await install_skill_tool(
{"config_name": "react", "destination": "/tmp/skills", "dry_run": True}
)
output = result[0].text output = result[0].text

View File

@@ -95,7 +95,9 @@ class TestInstallSkillE2E:
return str(skill_dir) return str(skill_dir)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_e2e_with_config_path_no_upload(self, test_config_file, tmp_path, mock_scrape_output): async def test_e2e_with_config_path_no_upload(
self, test_config_file, tmp_path, mock_scrape_output
):
"""E2E test: config_path mode, no upload""" """E2E test: config_path mode, no upload"""
# Mock the subprocess calls for scraping and enhancement # Mock the subprocess calls for scraping and enhancement
@@ -106,7 +108,10 @@ class TestInstallSkillE2E:
): ):
# Mock scrape_docs to return success # Mock scrape_docs to return success
mock_scrape.return_value = [ mock_scrape.return_value = [
TextContent(type="text", text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}") TextContent(
type="text",
text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}",
)
] ]
# Mock enhancement subprocess (success) # Mock enhancement subprocess (success)
@@ -114,7 +119,9 @@ class TestInstallSkillE2E:
# Mock package_skill to return success # Mock package_skill to return success
zip_path = str(tmp_path / "output" / "test-e2e.zip") zip_path = str(tmp_path / "output" / "test-e2e.zip")
mock_package.return_value = [TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")] mock_package.return_value = [
TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")
]
# Run the tool # Run the tool
result = await install_skill_tool( result = await install_skill_tool(
@@ -167,7 +174,10 @@ class TestInstallSkillE2E:
# Mock fetch_config to return success # Mock fetch_config to return success
config_path = str(tmp_path / "configs" / "react.json") config_path = str(tmp_path / "configs" / "react.json")
mock_fetch.return_value = [ mock_fetch.return_value = [
TextContent(type="text", text=f"✅ Config fetched successfully\n\nConfig saved to: {config_path}") TextContent(
type="text",
text=f"✅ Config fetched successfully\n\nConfig saved to: {config_path}",
)
] ]
# Mock config file read # Mock config file read
@@ -178,7 +188,9 @@ class TestInstallSkillE2E:
# Mock scrape_docs # Mock scrape_docs
skill_dir = str(tmp_path / "output" / "react") skill_dir = str(tmp_path / "output" / "react")
mock_scrape.return_value = [ mock_scrape.return_value = [
TextContent(type="text", text=f"✅ Scraping complete\n\nSkill built at: {skill_dir}") TextContent(
type="text", text=f"✅ Scraping complete\n\nSkill built at: {skill_dir}"
)
] ]
# Mock enhancement # Mock enhancement
@@ -186,7 +198,9 @@ class TestInstallSkillE2E:
# Mock package # Mock package
zip_path = str(tmp_path / "output" / "react.zip") zip_path = str(tmp_path / "output" / "react.zip")
mock_package.return_value = [TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")] mock_package.return_value = [
TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")
]
# Mock env (no API key - should skip upload) # Mock env (no API key - should skip upload)
mock_env.return_value = "" mock_env.return_value = ""
@@ -222,7 +236,9 @@ class TestInstallSkillE2E:
async def test_e2e_dry_run_mode(self, test_config_file): async def test_e2e_dry_run_mode(self, test_config_file):
"""E2E test: dry-run mode (no actual execution)""" """E2E test: dry-run mode (no actual execution)"""
result = await install_skill_tool({"config_path": test_config_file, "auto_upload": False, "dry_run": True}) result = await install_skill_tool(
{"config_path": test_config_file, "auto_upload": False, "dry_run": True}
)
output = result[0].text output = result[0].text
@@ -245,9 +261,13 @@ class TestInstallSkillE2E:
with patch("skill_seekers.mcp.server.scrape_docs_tool") as mock_scrape: with patch("skill_seekers.mcp.server.scrape_docs_tool") as mock_scrape:
# Mock scrape failure # Mock scrape failure
mock_scrape.return_value = [TextContent(type="text", text="❌ Scraping failed: Network timeout")] mock_scrape.return_value = [
TextContent(type="text", text="❌ Scraping failed: Network timeout")
]
result = await install_skill_tool({"config_path": test_config_file, "auto_upload": False, "dry_run": False}) result = await install_skill_tool(
{"config_path": test_config_file, "auto_upload": False, "dry_run": False}
)
output = result[0].text output = result[0].text
@@ -256,7 +276,9 @@ class TestInstallSkillE2E:
assert "WORKFLOW COMPLETE" not in output assert "WORKFLOW COMPLETE" not in output
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_e2e_error_handling_enhancement_failure(self, test_config_file, mock_scrape_output): async def test_e2e_error_handling_enhancement_failure(
self, test_config_file, mock_scrape_output
):
"""E2E test: error handling when enhancement fails""" """E2E test: error handling when enhancement fails"""
with ( with (
@@ -265,13 +287,18 @@ class TestInstallSkillE2E:
): ):
# Mock successful scrape # Mock successful scrape
mock_scrape.return_value = [ mock_scrape.return_value = [
TextContent(type="text", text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}") TextContent(
type="text",
text=f"✅ Scraping complete\n\nSkill built at: {mock_scrape_output}",
)
] ]
# Mock enhancement failure # Mock enhancement failure
mock_enhance.return_value = ("", "Enhancement error: Claude not found", 1) mock_enhance.return_value = ("", "Enhancement error: Claude not found", 1)
result = await install_skill_tool({"config_path": test_config_file, "auto_upload": False, "dry_run": False}) result = await install_skill_tool(
{"config_path": test_config_file, "auto_upload": False, "dry_run": False}
)
output = result[0].text output = result[0].text
@@ -311,7 +338,9 @@ class TestInstallSkillCLI_E2E:
# Import and call the tool directly (more reliable than subprocess) # Import and call the tool directly (more reliable than subprocess)
from skill_seekers.mcp.server import install_skill_tool from skill_seekers.mcp.server import install_skill_tool
result = await install_skill_tool({"config_path": test_config_file, "dry_run": True, "auto_upload": False}) result = await install_skill_tool(
{"config_path": test_config_file, "dry_run": True, "auto_upload": False}
)
# Verify output # Verify output
output = result[0].text output = result[0].text
@@ -324,7 +353,9 @@ class TestInstallSkillCLI_E2E:
# Run CLI without config # Run CLI without config
result = subprocess.run( result = subprocess.run(
[sys.executable, "-m", "skill_seekers.cli.install_skill"], capture_output=True, text=True [sys.executable, "-m", "skill_seekers.cli.install_skill"],
capture_output=True,
text=True,
) )
# Should fail # Should fail
@@ -337,7 +368,9 @@ class TestInstallSkillCLI_E2E:
"""E2E test: CLI help command""" """E2E test: CLI help command"""
result = subprocess.run( result = subprocess.run(
[sys.executable, "-m", "skill_seekers.cli.install_skill", "--help"], capture_output=True, text=True [sys.executable, "-m", "skill_seekers.cli.install_skill", "--help"],
capture_output=True,
text=True,
) )
# Should succeed # Should succeed
@@ -354,7 +387,9 @@ class TestInstallSkillCLI_E2E:
@patch("skill_seekers.mcp.server.scrape_docs_tool") @patch("skill_seekers.mcp.server.scrape_docs_tool")
@patch("skill_seekers.mcp.server.run_subprocess_with_streaming") @patch("skill_seekers.mcp.server.run_subprocess_with_streaming")
@patch("skill_seekers.mcp.server.package_skill_tool") @patch("skill_seekers.mcp.server.package_skill_tool")
async def test_cli_full_workflow_mocked(self, mock_package, mock_enhance, mock_scrape, test_config_file, tmp_path): async def test_cli_full_workflow_mocked(
self, mock_package, mock_enhance, mock_scrape, test_config_file, tmp_path
):
"""E2E test: Full CLI workflow with mocked phases (via direct call)""" """E2E test: Full CLI workflow with mocked phases (via direct call)"""
# Setup mocks # Setup mocks
@@ -366,7 +401,9 @@ class TestInstallSkillCLI_E2E:
mock_enhance.return_value = ("✅ Enhancement complete", "", 0) mock_enhance.return_value = ("✅ Enhancement complete", "", 0)
zip_path = str(tmp_path / "output" / "test-cli-e2e.zip") zip_path = str(tmp_path / "output" / "test-cli-e2e.zip")
mock_package.return_value = [TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")] mock_package.return_value = [
TextContent(type="text", text=f"✅ Package complete\n\nSaved to: {zip_path}")
]
# Call the tool directly # Call the tool directly
from skill_seekers.mcp.server import install_skill_tool from skill_seekers.mcp.server import install_skill_tool

View File

@@ -172,7 +172,9 @@ class TestRealConfigFiles(unittest.TestCase):
if os.path.exists(config_path): if os.path.exists(config_path):
config = load_config(config_path) config = load_config(config_path)
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertEqual(len(errors), 0, f"FastAPI config should be valid, got errors: {errors}") self.assertEqual(
len(errors), 0, f"FastAPI config should be valid, got errors: {errors}"
)
def test_steam_economy_config(self): def test_steam_economy_config(self):
"""Test Steam Economy config is valid""" """Test Steam Economy config is valid"""
@@ -180,7 +182,9 @@ class TestRealConfigFiles(unittest.TestCase):
if os.path.exists(config_path): if os.path.exists(config_path):
config = load_config(config_path) config = load_config(config_path)
errors, _ = validate_config(config) errors, _ = validate_config(config)
self.assertEqual(len(errors), 0, f"Steam Economy config should be valid, got errors: {errors}") self.assertEqual(
len(errors), 0, f"Steam Economy config should be valid, got errors: {errors}"
)
class TestURLProcessing(unittest.TestCase): class TestURLProcessing(unittest.TestCase):
@@ -221,7 +225,11 @@ class TestURLProcessing(unittest.TestCase):
config = { config = {
"name": "test", "name": "test",
"base_url": "https://example.com/", "base_url": "https://example.com/",
"start_urls": ["https://example.com/guide/", "https://example.com/api/", "https://example.com/tutorial/"], "start_urls": [
"https://example.com/guide/",
"https://example.com/api/",
"https://example.com/tutorial/",
],
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"}, "selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
"rate_limit": 0.1, "rate_limit": 0.1,
"max_pages": 10, "max_pages": 10,
@@ -423,14 +431,20 @@ app.use('*', cors())
# Verify llms.txt was detected # Verify llms.txt was detected
self.assertTrue(scraper.llms_txt_detected, "llms.txt should be detected") self.assertTrue(scraper.llms_txt_detected, "llms.txt should be detected")
self.assertEqual(scraper.llms_txt_variant, "explicit", "Should use explicit variant from config") self.assertEqual(
scraper.llms_txt_variant, "explicit", "Should use explicit variant from config"
)
# Verify pages were parsed # Verify pages were parsed
self.assertGreater(len(scraper.pages), 0, "Should have parsed pages from llms.txt") self.assertGreater(len(scraper.pages), 0, "Should have parsed pages from llms.txt")
# Verify page structure # Verify page structure
self.assertTrue(all("title" in page for page in scraper.pages), "All pages should have titles") self.assertTrue(
self.assertTrue(all("content" in page for page in scraper.pages), "All pages should have content") all("title" in page for page in scraper.pages), "All pages should have titles"
)
self.assertTrue(
all("content" in page for page in scraper.pages), "All pages should have content"
)
self.assertTrue( self.assertTrue(
any(len(page.get("code_samples", [])) > 0 for page in scraper.pages), any(len(page.get("code_samples", [])) > 0 for page in scraper.pages),
"At least one page should have code samples", "At least one page should have code samples",

View File

@@ -51,7 +51,9 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
mock_content.type = "file" mock_content.type = "file"
mock_content.encoding = "none" # This is what GitHub API returns for large files mock_content.encoding = "none" # This is what GitHub API returns for large files
mock_content.size = 1388271 mock_content.size = 1388271
mock_content.download_url = "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md" mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
)
with patch("skill_seekers.cli.github_scraper.Github"): with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config) scraper = self.GitHubScraper(config)
@@ -109,7 +111,9 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
def test_github_command_has_enhancement_flags(self): def test_github_command_has_enhancement_flags(self):
"""E2E: Verify --enhance-local flag exists in github command help""" """E2E: Verify --enhance-local flag exists in github command help"""
result = subprocess.run(["skill-seekers", "github", "--help"], capture_output=True, text=True) result = subprocess.run(
["skill-seekers", "github", "--help"], capture_output=True, text=True
)
# VERIFY: Command succeeds # VERIFY: Command succeeds
self.assertEqual(result.returncode, 0, "github --help should succeed") self.assertEqual(result.returncode, 0, "github --help should succeed")
@@ -148,9 +152,20 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
from skill_seekers.cli import main from skill_seekers.cli import main
# Mock sys.argv to simulate CLI call # Mock sys.argv to simulate CLI call
test_args = ["skill-seekers", "github", "--repo", "test/test", "--name", "test", "--enhance-local"] test_args = [
"skill-seekers",
"github",
"--repo",
"test/test",
"--name",
"test",
"--enhance-local",
]
with patch("sys.argv", test_args), patch("skill_seekers.cli.github_scraper.main") as mock_github_main: with (
patch("sys.argv", test_args),
patch("skill_seekers.cli.github_scraper.main") as mock_github_main,
):
mock_github_main.return_value = 0 mock_github_main.return_value = 0
# Call main dispatcher # Call main dispatcher
@@ -165,9 +180,12 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
# VERIFY: sys.argv contains --enhance-local flag # VERIFY: sys.argv contains --enhance-local flag
# (main.py should have added it before calling github_scraper) # (main.py should have added it before calling github_scraper)
called_with_enhance = any("--enhance-local" in str(call) for call in mock_github_main.call_args_list) called_with_enhance = any(
"--enhance-local" in str(call) for call in mock_github_main.call_args_list
)
self.assertTrue( self.assertTrue(
called_with_enhance or "--enhance-local" in sys.argv, "Flag should be forwarded to github_scraper" called_with_enhance or "--enhance-local" in sys.argv,
"Flag should be forwarded to github_scraper",
) )
@@ -203,7 +221,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
custom_url = "http://localhost:3000" custom_url = "http://localhost:3000"
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}), patch.dict(
os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}
),
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic, patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
): ):
# Create enhancer # Create enhancer
@@ -213,7 +233,11 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
mock_anthropic.assert_called_once() mock_anthropic.assert_called_once()
call_kwargs = mock_anthropic.call_args[1] call_kwargs = mock_anthropic.call_args[1]
self.assertIn("base_url", call_kwargs, "base_url should be passed") self.assertIn("base_url", call_kwargs, "base_url should be passed")
self.assertEqual(call_kwargs["base_url"], custom_url, "base_url should match ANTHROPIC_BASE_URL env var") self.assertEqual(
call_kwargs["base_url"],
custom_url,
"base_url should match ANTHROPIC_BASE_URL env var",
)
def test_anthropic_auth_token_support(self): def test_anthropic_auth_token_support(self):
"""E2E: Verify ANTHROPIC_AUTH_TOKEN is accepted as alternative to ANTHROPIC_API_KEY""" """E2E: Verify ANTHROPIC_AUTH_TOKEN is accepted as alternative to ANTHROPIC_API_KEY"""
@@ -234,13 +258,17 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
# VERIFY: api_key set to ANTHROPIC_AUTH_TOKEN value # VERIFY: api_key set to ANTHROPIC_AUTH_TOKEN value
self.assertEqual( self.assertEqual(
enhancer.api_key, custom_token, "Should use ANTHROPIC_AUTH_TOKEN when ANTHROPIC_API_KEY not set" enhancer.api_key,
custom_token,
"Should use ANTHROPIC_AUTH_TOKEN when ANTHROPIC_API_KEY not set",
) )
# VERIFY: Anthropic client initialized with correct key # VERIFY: Anthropic client initialized with correct key
mock_anthropic.assert_called_once() mock_anthropic.assert_called_once()
call_kwargs = mock_anthropic.call_args[1] call_kwargs = mock_anthropic.call_args[1]
self.assertEqual(call_kwargs["api_key"], custom_token, "api_key should match ANTHROPIC_AUTH_TOKEN") self.assertEqual(
call_kwargs["api_key"], custom_token, "api_key should match ANTHROPIC_AUTH_TOKEN"
)
def test_thinking_block_handling(self): def test_thinking_block_handling(self):
"""E2E: Verify ThinkingBlock doesn't cause .text AttributeError""" """E2E: Verify ThinkingBlock doesn't cause .text AttributeError"""
@@ -284,7 +312,11 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
# VERIFY: Should find text from TextBlock, ignore ThinkingBlock # VERIFY: Should find text from TextBlock, ignore ThinkingBlock
self.assertIsNotNone(result, "Should return enhanced content") self.assertIsNotNone(result, "Should return enhanced content")
self.assertEqual(result, "# Enhanced SKILL.md\n\nContent here", "Should extract text from TextBlock") self.assertEqual(
result,
"# Enhanced SKILL.md\n\nContent here",
"Should extract text from TextBlock",
)
class TestIssue219IntegrationAll(unittest.TestCase): class TestIssue219IntegrationAll(unittest.TestCase):
@@ -297,7 +329,9 @@ class TestIssue219IntegrationAll(unittest.TestCase):
# 2. Large files are downloaded # 2. Large files are downloaded
# 3. Custom API endpoints work # 3. Custom API endpoints work
result = subprocess.run(["skill-seekers", "github", "--help"], capture_output=True, text=True) result = subprocess.run(
["skill-seekers", "github", "--help"], capture_output=True, text=True
)
# All flags present # All flags present
self.assertIn("--enhance", result.stdout) self.assertIn("--enhance", result.stdout)

View File

@@ -48,7 +48,9 @@ def test_url_parsing_with_complex_paths():
assert variants is not None assert variants is not None
assert variants["url"] == "https://example.com/llms-full.txt" assert variants["url"] == "https://example.com/llms-full.txt"
mock_head.assert_called_with("https://example.com/llms-full.txt", timeout=5, allow_redirects=True) mock_head.assert_called_with(
"https://example.com/llms-full.txt", timeout=5, allow_redirects=True
)
def test_detect_all_variants(): def test_detect_all_variants():

View File

@@ -133,7 +133,10 @@ def test_custom_max_retries():
"""Test custom max_retries parameter""" """Test custom max_retries parameter"""
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5) downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
with patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, patch("time.sleep"): with (
patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
patch("time.sleep"),
):
content = downloader.download() content = downloader.download()
assert content is None assert content is None
@@ -189,7 +192,9 @@ def test_is_markdown_rejects_html_doctype():
"""Test that HTML with DOCTYPE is rejected (prevents redirect trap)""" """Test that HTML with DOCTYPE is rejected (prevents redirect trap)"""
downloader = LlmsTxtDownloader("https://example.com/llms.txt") downloader = LlmsTxtDownloader("https://example.com/llms.txt")
html = "<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>" html = (
"<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
)
assert not downloader._is_markdown(html) assert not downloader._is_markdown(html)
# Test case-insensitive # Test case-insensitive

View File

@@ -93,7 +93,9 @@ plain code without language
- [HTML Page](./page.html) - [HTML Page](./page.html)
- [External](https://google.com) - [External](https://google.com)
""" """
result = self.converter._extract_markdown_content(content, "https://example.com/docs/test.md") result = self.converter._extract_markdown_content(
content, "https://example.com/docs/test.md"
)
# Should only include .md links # Should only include .md links
md_links = [l for l in result["links"] if ".md" in l] md_links = [l for l in result["links"] if ".md" in l]
self.assertEqual(len(md_links), len(result["links"])) self.assertEqual(len(md_links), len(result["links"]))
@@ -115,7 +117,9 @@ Another paragraph that should be included in the final content output.
def test_detect_html_in_md_url(self): def test_detect_html_in_md_url(self):
"""Test that HTML content is detected when .md URL returns HTML.""" """Test that HTML content is detected when .md URL returns HTML."""
html_content = "<!DOCTYPE html><html><head><title>Page</title></head><body><h1>Hello</h1></body></html>" html_content = "<!DOCTYPE html><html><head><title>Page</title></head><body><h1>Hello</h1></body></html>"
result = self.converter._extract_markdown_content(html_content, "https://example.com/test.md") result = self.converter._extract_markdown_content(
html_content, "https://example.com/test.md"
)
self.assertEqual(result["title"], "Page") self.assertEqual(result["title"], "Page")

View File

@@ -67,7 +67,10 @@ def sample_config(temp_dirs):
"base_url": "https://test-framework.dev/", "base_url": "https://test-framework.dev/",
"selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"}, "selectors": {"main_content": "article", "title": "h1", "code_blocks": "pre"},
"url_patterns": {"include": ["/docs/"], "exclude": ["/blog/", "/search/"]}, "url_patterns": {"include": ["/docs/"], "exclude": ["/blog/", "/search/"]},
"categories": {"getting_started": ["introduction", "getting-started"], "api": ["api", "reference"]}, "categories": {
"getting_started": ["introduction", "getting-started"],
"api": ["api", "reference"],
},
"rate_limit": 0.5, "rate_limit": 0.5,
"max_pages": 100, "max_pages": 100,
} }
@@ -85,7 +88,12 @@ def unified_config(temp_dirs):
"description": "Test unified scraping", "description": "Test unified scraping",
"merge_mode": "rule-based", "merge_mode": "rule-based",
"sources": [ "sources": [
{"type": "documentation", "base_url": "https://example.com/docs/", "extract_api": True, "max_pages": 10}, {
"type": "documentation",
"base_url": "https://example.com/docs/",
"extract_api": True,
"max_pages": 10,
},
{"type": "github", "repo": "test/repo", "extract_readme": True}, {"type": "github", "repo": "test/repo", "extract_readme": True},
], ],
} }
@@ -166,7 +174,11 @@ class TestConfigTools:
"""Test basic config generation.""" """Test basic config generation."""
monkeypatch.chdir(temp_dirs["base"]) monkeypatch.chdir(temp_dirs["base"])
args = {"name": "my-framework", "url": "https://my-framework.dev/", "description": "My framework skill"} args = {
"name": "my-framework",
"url": "https://my-framework.dev/",
"description": "My framework skill",
}
result = await server_fastmcp.generate_config(**args) result = await server_fastmcp.generate_config(**args)
@@ -232,7 +244,9 @@ class TestConfigTools:
async def test_validate_config_missing_file(self, temp_dirs): async def test_validate_config_missing_file(self, temp_dirs):
"""Test validating a non-existent config file.""" """Test validating a non-existent config file."""
result = await server_fastmcp.validate_config(config_path=str(temp_dirs["config"] / "nonexistent.json")) result = await server_fastmcp.validate_config(
config_path=str(temp_dirs["config"] / "nonexistent.json")
)
assert isinstance(result, str) assert isinstance(result, str)
# Should indicate error # Should indicate error
@@ -252,7 +266,9 @@ class TestScrapingTools:
async def test_estimate_pages_basic(self, sample_config): async def test_estimate_pages_basic(self, sample_config):
"""Test basic page estimation.""" """Test basic page estimation."""
with patch("subprocess.run") as mock_run: with patch("subprocess.run") as mock_run:
mock_run.return_value = Mock(returncode=0, stdout="Estimated pages: 150\nRecommended max_pages: 200") mock_run.return_value = Mock(
returncode=0, stdout="Estimated pages: 150\nRecommended max_pages: 200"
)
result = await server_fastmcp.estimate_pages(config_path=str(sample_config)) result = await server_fastmcp.estimate_pages(config_path=str(sample_config))
@@ -266,7 +282,9 @@ class TestScrapingTools:
async def test_estimate_pages_custom_discovery(self, sample_config): async def test_estimate_pages_custom_discovery(self, sample_config):
"""Test estimation with custom max_discovery.""" """Test estimation with custom max_discovery."""
result = await server_fastmcp.estimate_pages(config_path=str(sample_config), max_discovery=500) result = await server_fastmcp.estimate_pages(
config_path=str(sample_config), max_discovery=500
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -281,7 +299,9 @@ class TestScrapingTools:
async def test_scrape_docs_with_enhancement(self, sample_config): async def test_scrape_docs_with_enhancement(self, sample_config):
"""Test scraping with local enhancement.""" """Test scraping with local enhancement."""
result = await server_fastmcp.scrape_docs(config_path=str(sample_config), enhance_local=True, dry_run=True) result = await server_fastmcp.scrape_docs(
config_path=str(sample_config), enhance_local=True, dry_run=True
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -310,7 +330,9 @@ class TestScrapingTools:
with patch("subprocess.run") as mock_run: with patch("subprocess.run") as mock_run:
mock_run.return_value = Mock(returncode=0, stdout="GitHub scraping completed") mock_run.return_value = Mock(returncode=0, stdout="GitHub scraping completed")
result = await server_fastmcp.scrape_github(repo="facebook/react", name="react-github-test") result = await server_fastmcp.scrape_github(
repo="facebook/react", name="react-github-test"
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -325,7 +347,12 @@ class TestScrapingTools:
async def test_scrape_github_options(self): async def test_scrape_github_options(self):
"""Test GitHub scraping with various options.""" """Test GitHub scraping with various options."""
result = await server_fastmcp.scrape_github( result = await server_fastmcp.scrape_github(
repo="test/repo", no_issues=True, no_changelog=True, no_releases=True, max_issues=50, scrape_only=True repo="test/repo",
no_issues=True,
no_changelog=True,
no_releases=True,
max_issues=50,
scrape_only=True,
) )
assert isinstance(result, str) assert isinstance(result, str)
@@ -333,7 +360,11 @@ class TestScrapingTools:
async def test_scrape_pdf_basic(self, temp_dirs): async def test_scrape_pdf_basic(self, temp_dirs):
"""Test basic PDF scraping.""" """Test basic PDF scraping."""
# Create a dummy PDF config # Create a dummy PDF config
pdf_config = {"name": "test-pdf", "pdf_path": "/path/to/test.pdf", "description": "Test PDF skill"} pdf_config = {
"name": "test-pdf",
"pdf_path": "/path/to/test.pdf",
"description": "Test PDF skill",
}
config_path = temp_dirs["config"] / "test-pdf.json" config_path = temp_dirs["config"] / "test-pdf.json"
config_path.write_text(json.dumps(pdf_config)) config_path.write_text(json.dumps(pdf_config))
@@ -343,7 +374,9 @@ class TestScrapingTools:
async def test_scrape_pdf_direct_path(self): async def test_scrape_pdf_direct_path(self):
"""Test PDF scraping with direct path.""" """Test PDF scraping with direct path."""
result = await server_fastmcp.scrape_pdf(pdf_path="/path/to/manual.pdf", name="manual-skill") result = await server_fastmcp.scrape_pdf(
pdf_path="/path/to/manual.pdf", name="manual-skill"
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -428,7 +461,9 @@ class TestPackagingTools:
async def test_upload_skill_missing_file(self, temp_dirs): async def test_upload_skill_missing_file(self, temp_dirs):
"""Test upload with missing file.""" """Test upload with missing file."""
result = await server_fastmcp.upload_skill(skill_zip=str(temp_dirs["output"] / "nonexistent.zip")) result = await server_fastmcp.upload_skill(
skill_zip=str(temp_dirs["output"] / "nonexistent.zip")
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -438,7 +473,9 @@ class TestPackagingTools:
with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch: with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch:
mock_fetch.return_value = [Mock(text="Config fetched")] mock_fetch.return_value = [Mock(text="Config fetched")]
result = await server_fastmcp.install_skill(config_name="react", destination="output", dry_run=True) result = await server_fastmcp.install_skill(
config_name="react", destination="output", dry_run=True
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -458,7 +495,9 @@ class TestPackagingTools:
with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch: with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch:
mock_fetch.return_value = [Mock(text="Config fetched")] mock_fetch.return_value = [Mock(text="Config fetched")]
result = await server_fastmcp.install_skill(config_name="react", unlimited=True, dry_run=True) result = await server_fastmcp.install_skill(
config_name="react", unlimited=True, dry_run=True
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -467,7 +506,9 @@ class TestPackagingTools:
with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch: with patch("skill_seekers.mcp.tools.source_tools.fetch_config_tool") as mock_fetch:
mock_fetch.return_value = [Mock(text="Config fetched")] mock_fetch.return_value = [Mock(text="Config fetched")]
result = await server_fastmcp.install_skill(config_name="react", auto_upload=False, dry_run=True) result = await server_fastmcp.install_skill(
config_name="react", auto_upload=False, dry_run=True
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -484,7 +525,9 @@ class TestSplittingTools:
async def test_split_config_auto_strategy(self, sample_config): async def test_split_config_auto_strategy(self, sample_config):
"""Test config splitting with auto strategy.""" """Test config splitting with auto strategy."""
result = await server_fastmcp.split_config(config_path=str(sample_config), strategy="auto", dry_run=True) result = await server_fastmcp.split_config(
config_path=str(sample_config), strategy="auto", dry_run=True
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -510,7 +553,9 @@ class TestSplittingTools:
(temp_dirs["config"] / "godot-scripting.json").write_text("{}") (temp_dirs["config"] / "godot-scripting.json").write_text("{}")
(temp_dirs["config"] / "godot-physics.json").write_text("{}") (temp_dirs["config"] / "godot-physics.json").write_text("{}")
result = await server_fastmcp.generate_router(config_pattern=str(temp_dirs["config"] / "godot-*.json")) result = await server_fastmcp.generate_router(
config_pattern=str(temp_dirs["config"] / "godot-*.json")
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -552,7 +597,9 @@ class TestSourceTools:
async def test_fetch_config_download_api(self, temp_dirs): async def test_fetch_config_download_api(self, temp_dirs):
"""Test downloading specific config from API.""" """Test downloading specific config from API."""
result = await server_fastmcp.fetch_config(config_name="react", destination=str(temp_dirs["config"])) result = await server_fastmcp.fetch_config(
config_name="react", destination=str(temp_dirs["config"])
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -565,7 +612,9 @@ class TestSourceTools:
async def test_fetch_config_from_git_url(self, temp_dirs): async def test_fetch_config_from_git_url(self, temp_dirs):
"""Test fetching config from git URL.""" """Test fetching config from git URL."""
result = await server_fastmcp.fetch_config( result = await server_fastmcp.fetch_config(
config_name="react", git_url="https://github.com/myorg/configs.git", destination=str(temp_dirs["config"]) config_name="react",
git_url="https://github.com/myorg/configs.git",
destination=str(temp_dirs["config"]),
) )
assert isinstance(result, str) assert isinstance(result, str)
@@ -612,13 +661,17 @@ class TestSourceTools:
"""Test submitting config as JSON string.""" """Test submitting config as JSON string."""
config_json = json.dumps({"name": "my-framework", "base_url": "https://my-framework.dev/"}) config_json = json.dumps({"name": "my-framework", "base_url": "https://my-framework.dev/"})
result = await server_fastmcp.submit_config(config_json=config_json, testing_notes="Works great!") result = await server_fastmcp.submit_config(
config_json=config_json, testing_notes="Works great!"
)
assert isinstance(result, str) assert isinstance(result, str)
async def test_add_config_source_basic(self): async def test_add_config_source_basic(self):
"""Test adding a config source.""" """Test adding a config source."""
result = await server_fastmcp.add_config_source(name="team", git_url="https://github.com/myorg/configs.git") result = await server_fastmcp.add_config_source(
name="team", git_url="https://github.com/myorg/configs.git"
)
assert isinstance(result, str) assert isinstance(result, str)
@@ -706,7 +759,9 @@ class TestFastMCPIntegration:
async def test_workflow_split_router(self, sample_config, temp_dirs): async def test_workflow_split_router(self, sample_config, temp_dirs):
"""Test workflow: split config → generate router.""" """Test workflow: split config → generate router."""
# Step 1: Split config # Step 1: Split config
result1 = await server_fastmcp.split_config(config_path=str(sample_config), strategy="category", dry_run=True) result1 = await server_fastmcp.split_config(
config_path=str(sample_config), strategy="category", dry_run=True
)
assert isinstance(result1, str) assert isinstance(result1, str)
# Step 2: Generate router # Step 2: Generate router

View File

@@ -42,7 +42,11 @@ def mock_git_repo(temp_dirs):
(repo_path / ".git").mkdir() (repo_path / ".git").mkdir()
# Create sample config files # Create sample config files
react_config = {"name": "react", "description": "React framework", "base_url": "https://react.dev/"} react_config = {
"name": "react",
"description": "React framework",
"base_url": "https://react.dev/",
}
(repo_path / "react.json").write_text(json.dumps(react_config, indent=2)) (repo_path / "react.json").write_text(json.dumps(react_config, indent=2))
vue_config = {"name": "vue", "description": "Vue framework", "base_url": "https://vuejs.org/"} vue_config = {"name": "vue", "description": "Vue framework", "base_url": "https://vuejs.org/"}
@@ -65,8 +69,18 @@ class TestFetchConfigModes:
mock_response = MagicMock() mock_response = MagicMock()
mock_response.json.return_value = { mock_response.json.return_value = {
"configs": [ "configs": [
{"name": "react", "category": "web-frameworks", "description": "React framework", "type": "single"}, {
{"name": "vue", "category": "web-frameworks", "description": "Vue framework", "type": "single"}, "name": "react",
"category": "web-frameworks",
"description": "React framework",
"type": "single",
},
{
"name": "vue",
"category": "web-frameworks",
"description": "Vue framework",
"type": "single",
},
], ],
"total": 2, "total": 2,
} }
@@ -94,7 +108,10 @@ class TestFetchConfigModes:
} }
mock_download_response = MagicMock() mock_download_response = MagicMock()
mock_download_response.json.return_value = {"name": "react", "base_url": "https://react.dev/"} mock_download_response.json.return_value = {
"name": "react",
"base_url": "https://react.dev/",
}
mock_client_instance = mock_client.return_value.__aenter__.return_value mock_client_instance = mock_client.return_value.__aenter__.return_value
mock_client_instance.get.side_effect = [mock_detail_response, mock_download_response] mock_client_instance.get.side_effect = [mock_detail_response, mock_download_response]
@@ -149,7 +166,9 @@ class TestFetchConfigModes:
@patch("skill_seekers.mcp.server.GitConfigRepo") @patch("skill_seekers.mcp.server.GitConfigRepo")
@patch("skill_seekers.mcp.server.SourceManager") @patch("skill_seekers.mcp.server.SourceManager")
async def test_fetch_config_source_mode(self, mock_source_manager_class, mock_git_repo_class, temp_dirs): async def test_fetch_config_source_mode(
self, mock_source_manager_class, mock_git_repo_class, temp_dirs
):
"""Test Source mode - using named source from registry.""" """Test Source mode - using named source from registry."""
from skill_seekers.mcp.server import fetch_config_tool from skill_seekers.mcp.server import fetch_config_tool
@@ -491,7 +510,9 @@ class TestCompleteWorkflow:
} }
mock_sm_class.return_value = mock_sm mock_sm_class.return_value = mock_sm
add_result = await add_config_source_tool({"name": "team", "git_url": "https://github.com/myorg/configs.git"}) add_result = await add_config_source_tool(
{"name": "team", "git_url": "https://github.com/myorg/configs.git"}
)
assert "" in add_result[0].text assert "" in add_result[0].text
# Step 2: Fetch config from source # Step 2: Fetch config from source

View File

@@ -119,7 +119,11 @@ class TestGenerateConfigTool(unittest.IsolatedAsyncioTestCase):
async def test_generate_config_basic(self): async def test_generate_config_basic(self):
"""Test basic config generation""" """Test basic config generation"""
args = {"name": "test-framework", "url": "https://test-framework.dev/", "description": "Test framework skill"} args = {
"name": "test-framework",
"url": "https://test-framework.dev/",
"description": "Test framework skill",
}
result = await skill_seeker_server.generate_config_tool(args) result = await skill_seeker_server.generate_config_tool(args)
@@ -564,7 +568,9 @@ class TestSubmitConfigTool(unittest.IsolatedAsyncioTestCase):
async def test_submit_config_requires_token(self): async def test_submit_config_requires_token(self):
"""Should error without GitHub token""" """Should error without GitHub token"""
args = {"config_json": '{"name": "test", "description": "Test", "base_url": "https://example.com"}'} args = {
"config_json": '{"name": "test", "description": "Test", "base_url": "https://example.com"}'
}
result = await skill_seeker_server.submit_config_tool(args) result = await skill_seeker_server.submit_config_tool(args)
self.assertIn("GitHub token required", result[0].text) self.assertIn("GitHub token required", result[0].text)
@@ -577,7 +583,9 @@ class TestSubmitConfigTool(unittest.IsolatedAsyncioTestCase):
result = await skill_seeker_server.submit_config_tool(args) result = await skill_seeker_server.submit_config_tool(args)
self.assertIn("validation failed", result[0].text.lower()) self.assertIn("validation failed", result[0].text.lower())
# ConfigValidator detects missing config type (base_url/repo/pdf) # ConfigValidator detects missing config type (base_url/repo/pdf)
self.assertTrue("cannot detect" in result[0].text.lower() or "missing" in result[0].text.lower()) self.assertTrue(
"cannot detect" in result[0].text.lower() or "missing" in result[0].text.lower()
)
async def test_submit_config_validates_name_format(self): async def test_submit_config_validates_name_format(self):
"""Should reject invalid name characters""" """Should reject invalid name characters"""
@@ -649,7 +657,9 @@ class TestSubmitConfigTool(unittest.IsolatedAsyncioTestCase):
async def test_submit_config_from_file_path(self): async def test_submit_config_from_file_path(self):
"""Should accept config_path parameter""" """Should accept config_path parameter"""
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump({"name": "testfile", "description": "From file", "base_url": "https://test.com/"}, f) json.dump(
{"name": "testfile", "description": "From file", "base_url": "https://test.com/"}, f
)
temp_path = f.name temp_path = f.name
try: try:

View File

@@ -24,11 +24,29 @@ class TestIssueCategorization:
def test_categorize_issues_basic(self): def test_categorize_issues_basic(self):
"""Test basic issue categorization.""" """Test basic issue categorization."""
problems = [ problems = [
{"title": "OAuth setup fails", "labels": ["bug", "oauth"], "number": 1, "state": "open", "comments": 10}, {
{"title": "Testing framework issue", "labels": ["testing"], "number": 2, "state": "open", "comments": 5}, "title": "OAuth setup fails",
"labels": ["bug", "oauth"],
"number": 1,
"state": "open",
"comments": 10,
},
{
"title": "Testing framework issue",
"labels": ["testing"],
"number": 2,
"state": "open",
"comments": 5,
},
] ]
solutions = [ solutions = [
{"title": "Fixed OAuth redirect", "labels": ["oauth"], "number": 3, "state": "closed", "comments": 3} {
"title": "Fixed OAuth redirect",
"labels": ["oauth"],
"number": 3,
"state": "closed",
"comments": 3,
}
] ]
topics = ["oauth", "testing", "async"] topics = ["oauth", "testing", "async"]
@@ -43,7 +61,13 @@ class TestIssueCategorization:
def test_categorize_issues_keyword_matching(self): def test_categorize_issues_keyword_matching(self):
"""Test keyword matching in titles and labels.""" """Test keyword matching in titles and labels."""
problems = [ problems = [
{"title": "Database connection timeout", "labels": ["db"], "number": 1, "state": "open", "comments": 7} {
"title": "Database connection timeout",
"labels": ["db"],
"number": 1,
"state": "open",
"comments": 7,
}
] ]
solutions = [] solutions = []
@@ -57,7 +81,13 @@ class TestIssueCategorization:
def test_categorize_issues_multi_keyword_topic(self): def test_categorize_issues_multi_keyword_topic(self):
"""Test topics with multiple keywords.""" """Test topics with multiple keywords."""
problems = [ problems = [
{"title": "Async API call fails", "labels": ["async", "api"], "number": 1, "state": "open", "comments": 8} {
"title": "Async API call fails",
"labels": ["async", "api"],
"number": 1,
"state": "open",
"comments": 8,
}
] ]
solutions = [] solutions = []
@@ -71,7 +101,15 @@ class TestIssueCategorization:
def test_categorize_issues_no_match_goes_to_other(self): def test_categorize_issues_no_match_goes_to_other(self):
"""Test that unmatched issues go to 'other' category.""" """Test that unmatched issues go to 'other' category."""
problems = [{"title": "Random issue", "labels": ["misc"], "number": 1, "state": "open", "comments": 5}] problems = [
{
"title": "Random issue",
"labels": ["misc"],
"number": 1,
"state": "open",
"comments": 5,
}
]
solutions = [] solutions = []
topics = ["oauth", "testing"] topics = ["oauth", "testing"]
@@ -94,7 +132,10 @@ class TestHybridContent:
def test_generate_hybrid_content_basic(self): def test_generate_hybrid_content_basic(self):
"""Test basic hybrid content generation.""" """Test basic hybrid content generation."""
api_data = {"apis": {"oauth_login": {"name": "oauth_login", "status": "matched"}}, "summary": {"total_apis": 1}} api_data = {
"apis": {"oauth_login": {"name": "oauth_login", "status": "matched"}},
"summary": {"total_apis": 1},
}
github_docs = { github_docs = {
"readme": "# Project README", "readme": "# Project README",
@@ -103,12 +144,29 @@ class TestHybridContent:
} }
github_insights = { github_insights = {
"metadata": {"stars": 1234, "forks": 56, "language": "Python", "description": "Test project"}, "metadata": {
"stars": 1234,
"forks": 56,
"language": "Python",
"description": "Test project",
},
"common_problems": [ "common_problems": [
{"title": "OAuth fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug"]} {
"title": "OAuth fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": ["bug"],
}
], ],
"known_solutions": [ "known_solutions": [
{"title": "Fixed OAuth", "number": 35, "state": "closed", "comments": 5, "labels": ["bug"]} {
"title": "Fixed OAuth",
"number": 35,
"state": "closed",
"comments": 5,
"labels": ["bug"],
}
], ],
"top_labels": [{"label": "bug", "count": 10}, {"label": "enhancement", "count": 5}], "top_labels": [{"label": "bug", "count": 10}, {"label": "enhancement", "count": 5}],
} }
@@ -190,11 +248,23 @@ class TestIssueToAPIMatching:
apis = {"oauth_login": {"name": "oauth_login"}, "async_fetch": {"name": "async_fetch"}} apis = {"oauth_login": {"name": "oauth_login"}, "async_fetch": {"name": "async_fetch"}}
problems = [ problems = [
{"title": "OAuth login fails", "number": 42, "state": "open", "comments": 10, "labels": ["bug", "oauth"]} {
"title": "OAuth login fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": ["bug", "oauth"],
}
] ]
solutions = [ solutions = [
{"title": "Fixed async fetch timeout", "number": 35, "state": "closed", "comments": 5, "labels": ["async"]} {
"title": "Fixed async fetch timeout",
"number": 35,
"state": "closed",
"comments": 5,
"labels": ["async"],
}
] ]
issue_links = _match_issues_to_apis(apis, problems, solutions) issue_links = _match_issues_to_apis(apis, problems, solutions)
@@ -214,7 +284,13 @@ class TestIssueToAPIMatching:
apis = {"database_connect": {"name": "database_connect"}} apis = {"database_connect": {"name": "database_connect"}}
problems = [ problems = [
{"title": "Random unrelated issue", "number": 1, "state": "open", "comments": 5, "labels": ["misc"]} {
"title": "Random unrelated issue",
"number": 1,
"state": "open",
"comments": 5,
"labels": ["misc"],
}
] ]
issue_links = _match_issues_to_apis(apis, problems, []) issue_links = _match_issues_to_apis(apis, problems, [])
@@ -226,7 +302,15 @@ class TestIssueToAPIMatching:
"""Test matching with dotted API names.""" """Test matching with dotted API names."""
apis = {"module.oauth.login": {"name": "module.oauth.login"}} apis = {"module.oauth.login": {"name": "module.oauth.login"}}
problems = [{"title": "OAuth module fails", "number": 42, "state": "open", "comments": 10, "labels": ["oauth"]}] problems = [
{
"title": "OAuth module fails",
"number": 42,
"state": "open",
"comments": 10,
"labels": ["oauth"],
}
]
issue_links = _match_issues_to_apis(apis, problems, []) issue_links = _match_issues_to_apis(apis, problems, [])
@@ -253,8 +337,12 @@ class TestRuleBasedMergerWithGitHubStreams:
) )
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 1234, "forks": 56, "language": "Python"}, metadata={"stars": 1234, "forks": 56, "language": "Python"},
common_problems=[{"title": "Bug 1", "number": 1, "state": "open", "comments": 10, "labels": ["bug"]}], common_problems=[
known_solutions=[{"title": "Fix 1", "number": 2, "state": "closed", "comments": 5, "labels": ["bug"]}], {"title": "Bug 1", "number": 1, "state": "open", "comments": 10, "labels": ["bug"]}
],
known_solutions=[
{"title": "Fix 1", "number": 2, "state": "closed", "comments": 5, "labels": ["bug"]}
],
top_labels=[{"label": "bug", "count": 10}], top_labels=[{"label": "bug", "count": 10}],
) )
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
@@ -277,7 +365,9 @@ class TestRuleBasedMergerWithGitHubStreams:
# Create three-stream data # Create three-stream data
code_stream = CodeStream(directory=tmp_path, files=[]) code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[]) docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[])
insights_stream = InsightsStream(metadata={"stars": 500}, common_problems=[], known_solutions=[], top_labels=[]) insights_stream = InsightsStream(
metadata={"stars": 500}, common_problems=[], known_solutions=[], top_labels=[]
)
github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream) github_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
# Create and run merger # Create and run merger
@@ -331,7 +421,12 @@ class TestIntegration:
], ],
) )
insights_stream = InsightsStream( insights_stream = InsightsStream(
metadata={"stars": 2500, "forks": 123, "language": "Python", "description": "Test framework"}, metadata={
"stars": 2500,
"forks": 123,
"language": "Python",
"description": "Test framework",
},
common_problems=[ common_problems=[
{ {
"title": "Installation fails on Windows", "title": "Installation fails on Windows",
@@ -349,7 +444,13 @@ class TestIntegration:
}, },
], ],
known_solutions=[ known_solutions=[
{"title": "Fixed config loading", "number": 130, "state": "closed", "comments": 8, "labels": ["bug"]}, {
"title": "Fixed config loading",
"number": 130,
"state": "closed",
"comments": 8,
"labels": ["bug"],
},
{ {
"title": "Resolved OAuth timeout", "title": "Resolved OAuth timeout",
"number": 125, "number": 125,

View File

@@ -114,8 +114,18 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = { scraped_data = {
"documentation": [ "documentation": [
{"source_id": "source_a", "base_url": "https://a.com", "total_pages": 5, "refs_dir": refs_dir1}, {
{"source_id": "source_b", "base_url": "https://b.com", "total_pages": 3, "refs_dir": refs_dir2}, "source_id": "source_a",
"base_url": "https://a.com",
"total_pages": 5,
"refs_dir": refs_dir1,
},
{
"source_id": "source_b",
"base_url": "https://b.com",
"total_pages": 3,
"refs_dir": refs_dir2,
},
], ],
"github": [], "github": [],
"pdf": [], "pdf": [],
@@ -139,7 +149,12 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = { scraped_data = {
"documentation": [ "documentation": [
{"source_id": "my_source", "base_url": "https://example.com", "total_pages": 10, "refs_dir": refs_dir} {
"source_id": "my_source",
"base_url": "https://example.com",
"total_pages": 10,
"refs_dir": refs_dir,
}
], ],
"github": [], "github": [],
"pdf": [], "pdf": [],
@@ -148,7 +163,9 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
builder = UnifiedSkillBuilder(config, scraped_data) builder = UnifiedSkillBuilder(config, scraped_data)
builder._generate_docs_references(scraped_data["documentation"]) builder._generate_docs_references(scraped_data["documentation"])
source_index = os.path.join(builder.skill_dir, "references", "documentation", "my_source", "index.md") source_index = os.path.join(
builder.skill_dir, "references", "documentation", "my_source", "index.md"
)
self.assertTrue(os.path.exists(source_index)) self.assertTrue(os.path.exists(source_index))
with open(source_index) as f: with open(source_index) as f:
@@ -169,8 +186,18 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = { scraped_data = {
"documentation": [ "documentation": [
{"source_id": "docs_one", "base_url": "https://one.com", "total_pages": 10, "refs_dir": refs_dir1}, {
{"source_id": "docs_two", "base_url": "https://two.com", "total_pages": 20, "refs_dir": refs_dir2}, "source_id": "docs_one",
"base_url": "https://one.com",
"total_pages": 10,
"refs_dir": refs_dir1,
},
{
"source_id": "docs_two",
"base_url": "https://two.com",
"total_pages": 20,
"refs_dir": refs_dir2,
},
], ],
"github": [], "github": [],
"pdf": [], "pdf": [],
@@ -205,7 +232,12 @@ class TestUnifiedSkillBuilderDocsReferences(unittest.TestCase):
scraped_data = { scraped_data = {
"documentation": [ "documentation": [
{"source_id": "test_source", "base_url": "https://test.com", "total_pages": 5, "refs_dir": refs_dir} {
"source_id": "test_source",
"base_url": "https://test.com",
"total_pages": 5,
"refs_dir": refs_dir,
}
], ],
"github": [], "github": [],
"pdf": [], "pdf": [],
@@ -290,7 +322,9 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
builder = UnifiedSkillBuilder(config, scraped_data) builder = UnifiedSkillBuilder(config, scraped_data)
builder._generate_github_references(scraped_data["github"]) builder._generate_github_references(scraped_data["github"])
readme_path = os.path.join(builder.skill_dir, "references", "github", "test_myrepo", "README.md") readme_path = os.path.join(
builder.skill_dir, "references", "github", "test_myrepo", "README.md"
)
self.assertTrue(os.path.exists(readme_path)) self.assertTrue(os.path.exists(readme_path))
with open(readme_path) as f: with open(readme_path) as f:
@@ -338,7 +372,9 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
builder = UnifiedSkillBuilder(config, scraped_data) builder = UnifiedSkillBuilder(config, scraped_data)
builder._generate_github_references(scraped_data["github"]) builder._generate_github_references(scraped_data["github"])
issues_path = os.path.join(builder.skill_dir, "references", "github", "test_repo", "issues.md") issues_path = os.path.join(
builder.skill_dir, "references", "github", "test_repo", "issues.md"
)
self.assertTrue(os.path.exists(issues_path)) self.assertTrue(os.path.exists(issues_path))
with open(issues_path) as f: with open(issues_path) as f:
@@ -358,12 +394,22 @@ class TestUnifiedSkillBuilderGitHubReferences(unittest.TestCase):
{ {
"repo": "org/first", "repo": "org/first",
"repo_id": "org_first", "repo_id": "org_first",
"data": {"readme": "#", "issues": [], "releases": [], "repo_info": {"stars": 100}}, "data": {
"readme": "#",
"issues": [],
"releases": [],
"repo_info": {"stars": 100},
},
}, },
{ {
"repo": "org/second", "repo": "org/second",
"repo_id": "org_second", "repo_id": "org_second",
"data": {"readme": "#", "issues": [], "releases": [], "repo_info": {"stars": 50}}, "data": {
"readme": "#",
"issues": [],
"releases": [],
"repo_info": {"stars": 50},
},
}, },
], ],
"pdf": [], "pdf": [],
@@ -406,7 +452,11 @@ class TestUnifiedSkillBuilderPdfReferences(unittest.TestCase):
scraped_data = { scraped_data = {
"documentation": [], "documentation": [],
"github": [], "github": [],
"pdf": [{"path": "/path/to/doc1.pdf"}, {"path": "/path/to/doc2.pdf"}, {"path": "/path/to/doc3.pdf"}], "pdf": [
{"path": "/path/to/doc1.pdf"},
{"path": "/path/to/doc2.pdf"},
{"path": "/path/to/doc3.pdf"},
],
} }
builder = UnifiedSkillBuilder(config, scraped_data) builder = UnifiedSkillBuilder(config, scraped_data)

View File

@@ -41,7 +41,9 @@ class TestPackageSkill(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
skill_dir = self.create_test_skill_directory(tmpdir) skill_dir = self.create_test_skill_directory(tmpdir)
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success) self.assertTrue(success)
self.assertIsNotNone(zip_path) self.assertIsNotNone(zip_path)
@@ -54,7 +56,9 @@ class TestPackageSkill(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
skill_dir = self.create_test_skill_directory(tmpdir) skill_dir = self.create_test_skill_directory(tmpdir)
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success) self.assertTrue(success)
@@ -77,7 +81,9 @@ class TestPackageSkill(unittest.TestCase):
# Add a backup file # Add a backup file
(skill_dir / "SKILL.md.backup").write_text("# Backup") (skill_dir / "SKILL.md.backup").write_text("# Backup")
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success) self.assertTrue(success)
@@ -88,7 +94,9 @@ class TestPackageSkill(unittest.TestCase):
def test_package_nonexistent_directory(self): def test_package_nonexistent_directory(self):
"""Test packaging a nonexistent directory""" """Test packaging a nonexistent directory"""
success, zip_path = package_skill("/nonexistent/path", open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
"/nonexistent/path", open_folder_after=False, skip_quality_check=True
)
self.assertFalse(success) self.assertFalse(success)
self.assertIsNone(zip_path) self.assertIsNone(zip_path)
@@ -99,7 +107,9 @@ class TestPackageSkill(unittest.TestCase):
skill_dir = Path(tmpdir) / "invalid-skill" skill_dir = Path(tmpdir) / "invalid-skill"
skill_dir.mkdir() skill_dir.mkdir()
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertFalse(success) self.assertFalse(success)
self.assertIsNone(zip_path) self.assertIsNone(zip_path)
@@ -118,7 +128,9 @@ class TestPackageSkill(unittest.TestCase):
(skill_dir / "scripts").mkdir() (skill_dir / "scripts").mkdir()
(skill_dir / "assets").mkdir() (skill_dir / "assets").mkdir()
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success) self.assertTrue(success)
# Zip should be in output directory, not inside skill directory # Zip should be in output directory, not inside skill directory
@@ -135,7 +147,9 @@ class TestPackageSkill(unittest.TestCase):
(skill_dir / "scripts").mkdir() (skill_dir / "scripts").mkdir()
(skill_dir / "assets").mkdir() (skill_dir / "assets").mkdir()
success, zip_path = package_skill(skill_dir, open_folder_after=False, skip_quality_check=True) success, zip_path = package_skill(
skill_dir, open_folder_after=False, skip_quality_check=True
)
self.assertTrue(success) self.assertTrue(success)
self.assertEqual(zip_path.name, "my-awesome-skill.zip") self.assertEqual(zip_path.name, "my-awesome-skill.zip")
@@ -149,7 +163,9 @@ class TestPackageSkillCLI(unittest.TestCase):
import subprocess import subprocess
try: try:
result = subprocess.run(["skill-seekers", "package", "--help"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers", "package", "--help"], capture_output=True, text=True, timeout=5
)
# argparse may return 0 or 2 for --help # argparse may return 0 or 2 for --help
self.assertIn(result.returncode, [0, 2]) self.assertIn(result.returncode, [0, 2])
@@ -163,7 +179,9 @@ class TestPackageSkillCLI(unittest.TestCase):
import subprocess import subprocess
try: try:
result = subprocess.run(["skill-seekers-package", "--help"], capture_output=True, text=True, timeout=5) result = subprocess.run(
["skill-seekers-package", "--help"], capture_output=True, text=True, timeout=5
)
# argparse may return 0 or 2 for --help # argparse may return 0 or 2 for --help
self.assertIn(result.returncode, [0, 2]) self.assertIn(result.returncode, [0, 2])

View File

@@ -126,7 +126,9 @@ class TestPackageStructure:
def test_mcp_tools_init_file_exists(self): def test_mcp_tools_init_file_exists(self):
"""Test that src/skill_seekers/mcp/tools/__init__.py exists.""" """Test that src/skill_seekers/mcp/tools/__init__.py exists."""
init_file = Path(__file__).parent.parent / "src" / "skill_seekers" / "mcp" / "tools" / "__init__.py" init_file = (
Path(__file__).parent.parent / "src" / "skill_seekers" / "mcp" / "tools" / "__init__.py"
)
assert init_file.exists(), "src/skill_seekers/mcp/tools/__init__.py not found" assert init_file.exists(), "src/skill_seekers/mcp/tools/__init__.py not found"
def test_cli_init_has_docstring(self): def test_cli_init_has_docstring(self):

View File

@@ -108,7 +108,11 @@ class TestUnlimitedMode(unittest.TestCase):
def test_limited_mode_default(self): def test_limited_mode_default(self):
"""Test default max_pages is limited""" """Test default max_pages is limited"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}} config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
os.chdir(tmpdir) os.chdir(tmpdir)
@@ -145,7 +149,11 @@ class TestRateLimiting(unittest.TestCase):
def test_rate_limit_default(self): def test_rate_limit_default(self):
"""Test default rate_limit is 0.5""" """Test default rate_limit is 0.5"""
config = {"name": "test", "base_url": "https://example.com/", "selectors": {"main_content": "article"}} config = {
"name": "test",
"base_url": "https://example.com/",
"selectors": {"main_content": "article"},
}
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
os.chdir(tmpdir) os.chdir(tmpdir)

Some files were not shown because too many files have changed in this diff Show More