change max lenght
This commit is contained in:
@@ -67,7 +67,9 @@ def get_adaptor(platform: str, config: dict = None) -> SkillAdaptor:
|
||||
if platform not in ADAPTORS:
|
||||
available = ", ".join(ADAPTORS.keys())
|
||||
if not ADAPTORS:
|
||||
raise ValueError(f"No adaptors are currently implemented. Platform '{platform}' is not available.")
|
||||
raise ValueError(
|
||||
f"No adaptors are currently implemented. Platform '{platform}' is not available."
|
||||
)
|
||||
raise ValueError(
|
||||
f"Platform '{platform}' is not supported or not yet implemented. Available platforms: {available}"
|
||||
)
|
||||
|
||||
@@ -167,14 +167,28 @@ version: {metadata.version}
|
||||
# Validate ZIP file
|
||||
package_path = Path(package_path)
|
||||
if not package_path.exists():
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"File not found: {package_path}",
|
||||
}
|
||||
|
||||
if not package_path.suffix == ".zip":
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Not a ZIP file: {package_path}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Not a ZIP file: {package_path}",
|
||||
}
|
||||
|
||||
# Prepare API request
|
||||
api_url = self.DEFAULT_API_ENDPOINT
|
||||
headers = {"x-api-key": api_key, "anthropic-version": "2023-06-01", "anthropic-beta": "skills-2025-10-02"}
|
||||
headers = {
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"anthropic-beta": "skills-2025-10-02",
|
||||
}
|
||||
|
||||
timeout = kwargs.get("timeout", 60)
|
||||
|
||||
@@ -231,7 +245,12 @@ version: {metadata.version}
|
||||
except:
|
||||
error_msg = f"HTTP {response.status_code}"
|
||||
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {error_msg}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Upload failed: {error_msg}",
|
||||
}
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return {
|
||||
@@ -250,7 +269,12 @@ version: {metadata.version}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Unexpected error: {str(e)}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Unexpected error: {str(e)}",
|
||||
}
|
||||
|
||||
def validate_api_key(self, api_key: str) -> bool:
|
||||
"""
|
||||
@@ -363,7 +387,9 @@ version: {metadata.version}
|
||||
print(f"❌ Error calling Claude API: {e}")
|
||||
return False
|
||||
|
||||
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]:
|
||||
def _read_reference_files(
|
||||
self, references_dir: Path, max_chars: int = 200000
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Read reference markdown files from skill directory.
|
||||
|
||||
|
||||
@@ -169,10 +169,20 @@ See the references directory for complete documentation with examples and best p
|
||||
# Validate package file FIRST
|
||||
package_path = Path(package_path)
|
||||
if not package_path.exists():
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"File not found: {package_path}",
|
||||
}
|
||||
|
||||
if not package_path.suffix == ".gz":
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Not a tar.gz file: {package_path}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Not a tar.gz file: {package_path}",
|
||||
}
|
||||
|
||||
# Check for google-generativeai library
|
||||
try:
|
||||
@@ -210,7 +220,9 @@ See the references directory for complete documentation with examples and best p
|
||||
}
|
||||
|
||||
# Upload to Files API
|
||||
uploaded_file = genai.upload_file(path=str(main_file), display_name=f"{package_path.stem}_instructions")
|
||||
uploaded_file = genai.upload_file(
|
||||
path=str(main_file), display_name=f"{package_path.stem}_instructions"
|
||||
)
|
||||
|
||||
# Upload reference files (if any)
|
||||
refs_dir = temp_path / "references"
|
||||
@@ -230,7 +242,12 @@ See the references directory for complete documentation with examples and best p
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {str(e)}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Upload failed: {str(e)}",
|
||||
}
|
||||
|
||||
def validate_api_key(self, api_key: str) -> bool:
|
||||
"""
|
||||
@@ -337,7 +354,9 @@ See the references directory for complete documentation with examples and best p
|
||||
print(f"❌ Error calling Gemini API: {e}")
|
||||
return False
|
||||
|
||||
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]:
|
||||
def _read_reference_files(
|
||||
self, references_dir: Path, max_chars: int = 200000
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Read reference markdown files from skill directory.
|
||||
|
||||
|
||||
@@ -185,10 +185,20 @@ Always prioritize accuracy by consulting the attached documentation files before
|
||||
# Validate package file FIRST
|
||||
package_path = Path(package_path)
|
||||
if not package_path.exists():
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"File not found: {package_path}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"File not found: {package_path}",
|
||||
}
|
||||
|
||||
if not package_path.suffix == ".zip":
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Not a ZIP file: {package_path}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Not a ZIP file: {package_path}",
|
||||
}
|
||||
|
||||
# Check for openai library
|
||||
try:
|
||||
@@ -254,7 +264,9 @@ Always prioritize accuracy by consulting the attached documentation files before
|
||||
|
||||
# Attach files to vector store
|
||||
if file_ids:
|
||||
client.beta.vector_stores.files.create_batch(vector_store_id=vector_store.id, file_ids=file_ids)
|
||||
client.beta.vector_stores.files.create_batch(
|
||||
vector_store_id=vector_store.id, file_ids=file_ids
|
||||
)
|
||||
|
||||
# Create assistant
|
||||
assistant = client.beta.assistants.create(
|
||||
@@ -273,7 +285,12 @@ Always prioritize accuracy by consulting the attached documentation files before
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "skill_id": None, "url": None, "message": f"Upload failed: {str(e)}"}
|
||||
return {
|
||||
"success": False,
|
||||
"skill_id": None,
|
||||
"url": None,
|
||||
"message": f"Upload failed: {str(e)}",
|
||||
}
|
||||
|
||||
def validate_api_key(self, api_key: str) -> bool:
|
||||
"""
|
||||
@@ -389,7 +406,9 @@ Always prioritize accuracy by consulting the attached documentation files before
|
||||
print(f"❌ Error calling OpenAI API: {e}")
|
||||
return False
|
||||
|
||||
def _read_reference_files(self, references_dir: Path, max_chars: int = 200000) -> dict[str, str]:
|
||||
def _read_reference_files(
|
||||
self, references_dir: Path, max_chars: int = 200000
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Read reference markdown files from skill directory.
|
||||
|
||||
|
||||
@@ -66,7 +66,9 @@ class AIEnhancer:
|
||||
self.mode = "disabled"
|
||||
self.enabled = False
|
||||
logger.info("ℹ️ AI enhancement disabled (no API key found)")
|
||||
logger.info(" Set ANTHROPIC_API_KEY to enable, or use 'skill-seekers enhance' for SKILL.md")
|
||||
logger.info(
|
||||
" Set ANTHROPIC_API_KEY to enable, or use 'skill-seekers enhance' for SKILL.md"
|
||||
)
|
||||
return
|
||||
|
||||
if self.mode == "api" and self.enabled:
|
||||
@@ -86,7 +88,9 @@ class AIEnhancer:
|
||||
# LOCAL mode requires Claude Code to be available
|
||||
# For patterns/examples, this is less practical than API mode
|
||||
logger.info("ℹ️ LOCAL mode not yet supported for pattern/example enhancement")
|
||||
logger.info(" Use API mode (set ANTHROPIC_API_KEY) or 'skill-seekers enhance' for SKILL.md")
|
||||
logger.info(
|
||||
" Use API mode (set ANTHROPIC_API_KEY) or 'skill-seekers enhance' for SKILL.md"
|
||||
)
|
||||
self.enabled = False
|
||||
|
||||
def _call_claude(self, prompt: str, max_tokens: int = 1000) -> str | None:
|
||||
@@ -96,7 +100,9 @@ class AIEnhancer:
|
||||
|
||||
try:
|
||||
response = self.client.messages.create(
|
||||
model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}]
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=max_tokens,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
return response.content[0].text
|
||||
except Exception as e:
|
||||
|
||||
@@ -94,7 +94,9 @@ class APIReferenceBuilder:
|
||||
name_without_ext = basename.rsplit(".", 1)[0] if "." in basename else basename
|
||||
return f"{name_without_ext}.md"
|
||||
|
||||
def _generate_file_reference(self, file_data: dict[str, Any], source_file: str, language: str) -> str:
|
||||
def _generate_file_reference(
|
||||
self, file_data: dict[str, Any], source_file: str, language: str
|
||||
) -> str:
|
||||
"""
|
||||
Generate complete markdown reference for a single file.
|
||||
|
||||
@@ -334,7 +336,9 @@ def main():
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate API reference from code analysis results")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate API reference from code analysis results"
|
||||
)
|
||||
|
||||
parser.add_argument("input_file", help="Code analysis JSON file")
|
||||
parser.add_argument("output_dir", help="Output directory for markdown files")
|
||||
|
||||
@@ -197,7 +197,9 @@ class ArchitecturalPatternDetector:
|
||||
|
||||
return detected
|
||||
|
||||
def _detect_mvc(self, dirs: dict[str, int], files: list[dict], frameworks: list[str]) -> list[ArchitecturalPattern]:
|
||||
def _detect_mvc(
|
||||
self, dirs: dict[str, int], files: list[dict], frameworks: list[str]
|
||||
) -> list[ArchitecturalPattern]:
|
||||
"""Detect MVC pattern"""
|
||||
patterns = []
|
||||
|
||||
@@ -226,7 +228,9 @@ class ArchitecturalPatternDetector:
|
||||
if len(components["Views"]) == 1:
|
||||
evidence.append("Views directory with view files")
|
||||
|
||||
if "controller" in file_path and ("controllers/" in file_path or "/controller/" in file_path):
|
||||
if "controller" in file_path and (
|
||||
"controllers/" in file_path or "/controller/" in file_path
|
||||
):
|
||||
components["Controllers"].append(file.get("file", ""))
|
||||
if len(components["Controllers"]) == 1:
|
||||
evidence.append("Controllers directory with controller classes")
|
||||
@@ -288,11 +292,15 @@ class ArchitecturalPatternDetector:
|
||||
if "view" in file_path:
|
||||
components["Views"].append(file.get("file", ""))
|
||||
|
||||
if "viewmodel" in file_path or any("viewmodel" in c.get("name", "").lower() for c in classes):
|
||||
if "viewmodel" in file_path or any(
|
||||
"viewmodel" in c.get("name", "").lower() for c in classes
|
||||
):
|
||||
components["ViewModels"].append(file.get("file", ""))
|
||||
|
||||
if len(components["ViewModels"]) >= 2:
|
||||
evidence.append(f"ViewModels directory with {len(components['ViewModels'])} ViewModel classes")
|
||||
evidence.append(
|
||||
f"ViewModels directory with {len(components['ViewModels'])} ViewModel classes"
|
||||
)
|
||||
|
||||
if len(components["Views"]) >= 2:
|
||||
evidence.append(f"Views directory with {len(components['Views'])} view files")
|
||||
@@ -329,7 +337,9 @@ class ArchitecturalPatternDetector:
|
||||
|
||||
return patterns
|
||||
|
||||
def _detect_repository(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
|
||||
def _detect_repository(
|
||||
self, dirs: dict[str, int], files: list[dict]
|
||||
) -> list[ArchitecturalPattern]:
|
||||
"""Detect Repository pattern"""
|
||||
patterns = []
|
||||
|
||||
@@ -352,7 +362,9 @@ class ArchitecturalPatternDetector:
|
||||
components["Repositories"].append(file.get("file", ""))
|
||||
|
||||
if len(components["Repositories"]) >= 2:
|
||||
evidence.append(f"Repository pattern: {len(components['Repositories'])} repository classes")
|
||||
evidence.append(
|
||||
f"Repository pattern: {len(components['Repositories'])} repository classes"
|
||||
)
|
||||
evidence.append("Repositories abstract data access logic")
|
||||
|
||||
patterns.append(
|
||||
@@ -367,7 +379,9 @@ class ArchitecturalPatternDetector:
|
||||
|
||||
return patterns
|
||||
|
||||
def _detect_service_layer(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
|
||||
def _detect_service_layer(
|
||||
self, dirs: dict[str, int], files: list[dict]
|
||||
) -> list[ArchitecturalPattern]:
|
||||
"""Detect Service Layer pattern"""
|
||||
patterns = []
|
||||
|
||||
@@ -404,7 +418,9 @@ class ArchitecturalPatternDetector:
|
||||
|
||||
return patterns
|
||||
|
||||
def _detect_layered_architecture(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
|
||||
def _detect_layered_architecture(
|
||||
self, dirs: dict[str, int], files: list[dict]
|
||||
) -> list[ArchitecturalPattern]:
|
||||
"""Detect Layered Architecture (3-tier, N-tier)"""
|
||||
patterns = []
|
||||
|
||||
@@ -444,7 +460,9 @@ class ArchitecturalPatternDetector:
|
||||
|
||||
return patterns
|
||||
|
||||
def _detect_clean_architecture(self, dirs: dict[str, int], files: list[dict]) -> list[ArchitecturalPattern]:
|
||||
def _detect_clean_architecture(
|
||||
self, dirs: dict[str, int], files: list[dict]
|
||||
) -> list[ArchitecturalPattern]:
|
||||
"""Detect Clean Architecture"""
|
||||
patterns = []
|
||||
|
||||
|
||||
@@ -150,7 +150,9 @@ class CodeAnalyzer:
|
||||
is_method = any(
|
||||
isinstance(parent, ast.ClassDef)
|
||||
for parent in ast.walk(tree)
|
||||
if hasattr(parent, "body") and isinstance(parent.body, list) and node in parent.body
|
||||
if hasattr(parent, "body")
|
||||
and isinstance(parent.body, list)
|
||||
and node in parent.body
|
||||
)
|
||||
except (TypeError, AttributeError):
|
||||
# If body is not iterable or check fails, assume it's a top-level function
|
||||
@@ -173,7 +175,9 @@ class CodeAnalyzer:
|
||||
if isinstance(base, ast.Name):
|
||||
bases.append(base.id)
|
||||
elif isinstance(base, ast.Attribute):
|
||||
bases.append(f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr)
|
||||
bases.append(
|
||||
f"{base.value.id}.{base.attr}" if hasattr(base.value, "id") else base.attr
|
||||
)
|
||||
|
||||
# Extract methods
|
||||
methods = []
|
||||
@@ -186,7 +190,11 @@ class CodeAnalyzer:
|
||||
docstring = ast.get_docstring(node)
|
||||
|
||||
return ClassSignature(
|
||||
name=node.name, base_classes=bases, methods=methods, docstring=docstring, line_number=node.lineno
|
||||
name=node.name,
|
||||
base_classes=bases,
|
||||
methods=methods,
|
||||
docstring=docstring,
|
||||
line_number=node.lineno,
|
||||
)
|
||||
|
||||
def _extract_python_function(self, node, is_method: bool = False) -> FunctionSignature:
|
||||
@@ -209,7 +217,9 @@ class CodeAnalyzer:
|
||||
param_idx = num_no_default + i
|
||||
if param_idx < len(params):
|
||||
try:
|
||||
params[param_idx].default = ast.unparse(default) if hasattr(ast, "unparse") else str(default)
|
||||
params[param_idx].default = (
|
||||
ast.unparse(default) if hasattr(ast, "unparse") else str(default)
|
||||
)
|
||||
except:
|
||||
params[param_idx].default = "..."
|
||||
|
||||
@@ -719,7 +729,9 @@ class CodeAnalyzer:
|
||||
# Distinguish XML doc comments (///)
|
||||
comment_type = "doc" if match.group(1).startswith("/") else "inline"
|
||||
|
||||
comments.append({"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type})
|
||||
comments.append(
|
||||
{"line": line_num, "text": comment_text.lstrip("/").strip(), "type": comment_type}
|
||||
)
|
||||
|
||||
# Multi-line comments (/* */)
|
||||
for match in re.finditer(r"/\*(.+?)\*/", content, re.DOTALL):
|
||||
@@ -1325,9 +1337,7 @@ class CodeAnalyzer:
|
||||
"""Extract PHP method signatures from class body."""
|
||||
methods = []
|
||||
|
||||
method_pattern = (
|
||||
r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
|
||||
)
|
||||
method_pattern = r"(?:public|private|protected)?\s*(?:static|final)?\s*function\s+(\w+)\s*\(([^)]*)\)(?:\s*:\s*(\??\w+))?"
|
||||
for match in re.finditer(method_pattern, class_body):
|
||||
method_name = match.group(1)
|
||||
params_str = match.group(2)
|
||||
@@ -1445,7 +1455,8 @@ def create_sprite(texture: str) -> Node2D:
|
||||
for method in cls["methods"]:
|
||||
params = ", ".join(
|
||||
[
|
||||
f"{p['name']}: {p['type_hint']}" + (f" = {p['default']}" if p.get("default") else "")
|
||||
f"{p['name']}: {p['type_hint']}"
|
||||
+ (f" = {p['default']}" if p.get("default") else "")
|
||||
for p in method["parameters"]
|
||||
]
|
||||
)
|
||||
|
||||
@@ -301,7 +301,11 @@ def analyze_codebase(
|
||||
# Only include files with actual analysis results
|
||||
if analysis and (analysis.get("classes") or analysis.get("functions")):
|
||||
results["files"].append(
|
||||
{"file": str(file_path.relative_to(directory)), "language": language, **analysis}
|
||||
{
|
||||
"file": str(file_path.relative_to(directory)),
|
||||
"language": language,
|
||||
**analysis,
|
||||
}
|
||||
)
|
||||
analyzed_count += 1
|
||||
|
||||
@@ -441,7 +445,10 @@ def analyze_codebase(
|
||||
|
||||
# Create extractor
|
||||
test_extractor = TestExampleExtractor(
|
||||
min_confidence=0.5, max_per_file=10, languages=languages, enhance_with_ai=enhance_with_ai
|
||||
min_confidence=0.5,
|
||||
max_per_file=10,
|
||||
languages=languages,
|
||||
enhance_with_ai=enhance_with_ai,
|
||||
)
|
||||
|
||||
# Extract examples from directory
|
||||
@@ -487,7 +494,11 @@ def analyze_codebase(
|
||||
tutorials_dir = output_dir / "tutorials"
|
||||
|
||||
# Get workflow examples from the example_report if available
|
||||
if "example_report" in locals() and example_report and example_report.total_examples > 0:
|
||||
if (
|
||||
"example_report" in locals()
|
||||
and example_report
|
||||
and example_report.total_examples > 0
|
||||
):
|
||||
# Convert example_report to list of dicts for processing
|
||||
examples_list = example_report.to_dict().get("examples", [])
|
||||
|
||||
@@ -565,7 +576,9 @@ def analyze_codebase(
|
||||
if "ai_enhancements" in result_dict:
|
||||
insights = result_dict["ai_enhancements"].get("overall_insights", {})
|
||||
if insights.get("security_issues_found"):
|
||||
logger.info(f"🔐 Security issues found: {insights['security_issues_found']}")
|
||||
logger.info(
|
||||
f"🔐 Security issues found: {insights['security_issues_found']}"
|
||||
)
|
||||
|
||||
logger.info(f"📁 Saved to: {config_output}")
|
||||
else:
|
||||
@@ -741,10 +754,14 @@ Use this skill when you need to:
|
||||
|
||||
refs_added = False
|
||||
if build_api_reference and (output_dir / "api_reference").exists():
|
||||
skill_content += "- **API Reference**: `references/api_reference/` - Complete API documentation\n"
|
||||
skill_content += (
|
||||
"- **API Reference**: `references/api_reference/` - Complete API documentation\n"
|
||||
)
|
||||
refs_added = True
|
||||
if build_dependency_graph and (output_dir / "dependencies").exists():
|
||||
skill_content += "- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
|
||||
skill_content += (
|
||||
"- **Dependencies**: `references/dependencies/` - Dependency graph and analysis\n"
|
||||
)
|
||||
refs_added = True
|
||||
if detect_patterns and (output_dir / "patterns").exists():
|
||||
skill_content += "- **Patterns**: `references/patterns/` - Detected design patterns\n"
|
||||
@@ -753,7 +770,9 @@ Use this skill when you need to:
|
||||
skill_content += "- **Examples**: `references/test_examples/` - Usage examples from tests\n"
|
||||
refs_added = True
|
||||
if extract_config_patterns and (output_dir / "config_patterns").exists():
|
||||
skill_content += "- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
|
||||
skill_content += (
|
||||
"- **Configuration**: `references/config_patterns/` - Configuration patterns\n"
|
||||
)
|
||||
refs_added = True
|
||||
if (output_dir / "architecture").exists():
|
||||
skill_content += "- **Architecture**: `references/architecture/` - Architectural patterns\n"
|
||||
@@ -1057,12 +1076,21 @@ Examples:
|
||||
)
|
||||
|
||||
parser.add_argument("--directory", required=True, help="Directory to analyze")
|
||||
parser.add_argument("--output", default="output/codebase/", help="Output directory (default: output/codebase/)")
|
||||
parser.add_argument(
|
||||
"--depth", choices=["surface", "deep", "full"], default="deep", help="Analysis depth (default: deep)"
|
||||
"--output", default="output/codebase/", help="Output directory (default: output/codebase/)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--depth",
|
||||
choices=["surface", "deep", "full"],
|
||||
default="deep",
|
||||
help="Analysis depth (default: deep)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)"
|
||||
)
|
||||
parser.add_argument("--languages", help="Comma-separated languages to analyze (e.g., Python,JavaScript,C++)")
|
||||
parser.add_argument("--file-patterns", help="Comma-separated file patterns (e.g., *.py,src/**/*.js)")
|
||||
parser.add_argument(
|
||||
"--skip-api-reference",
|
||||
action="store_true",
|
||||
|
||||
@@ -320,9 +320,11 @@ def api_keys_menu():
|
||||
if key:
|
||||
import os
|
||||
|
||||
env_var = {"anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "openai": "OPENAI_API_KEY"}[
|
||||
provider
|
||||
]
|
||||
env_var = {
|
||||
"anthropic": "ANTHROPIC_API_KEY",
|
||||
"google": "GOOGLE_API_KEY",
|
||||
"openai": "OPENAI_API_KEY",
|
||||
}[provider]
|
||||
if os.getenv(env_var):
|
||||
source = " (from environment)"
|
||||
else:
|
||||
@@ -389,7 +391,9 @@ def rate_limit_settings():
|
||||
print(f" • Show countdown: {current['show_countdown']}\n")
|
||||
|
||||
# Timeout
|
||||
timeout_input = input(f"Default timeout in minutes [{current['default_timeout_minutes']}]: ").strip()
|
||||
timeout_input = input(
|
||||
f"Default timeout in minutes [{current['default_timeout_minutes']}]: "
|
||||
).strip()
|
||||
if timeout_input:
|
||||
try:
|
||||
config.config["rate_limit"]["default_timeout_minutes"] = int(timeout_input)
|
||||
@@ -398,13 +402,17 @@ def rate_limit_settings():
|
||||
|
||||
# Auto-switch
|
||||
auto_switch_input = (
|
||||
input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ").strip().lower()
|
||||
input(f"Auto-switch to other profiles? [y/n] ({current['auto_switch_profiles']}): ")
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
if auto_switch_input:
|
||||
config.config["rate_limit"]["auto_switch_profiles"] = auto_switch_input in ["y", "yes"]
|
||||
|
||||
# Show countdown
|
||||
countdown_input = input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower()
|
||||
countdown_input = (
|
||||
input(f"Show countdown timer? [y/n] ({current['show_countdown']}): ").strip().lower()
|
||||
)
|
||||
if countdown_input:
|
||||
config.config["rate_limit"]["show_countdown"] = countdown_input in ["y", "yes"]
|
||||
|
||||
@@ -427,7 +435,9 @@ def resume_settings():
|
||||
print(f" • Keep progress for: {current['keep_progress_days']} days\n")
|
||||
|
||||
# Auto-save interval
|
||||
interval_input = input(f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: ").strip()
|
||||
interval_input = input(
|
||||
f"Auto-save interval in seconds [{current['auto_save_interval_seconds']}]: "
|
||||
).strip()
|
||||
if interval_input:
|
||||
try:
|
||||
config.config["resume"]["auto_save_interval_seconds"] = int(interval_input)
|
||||
@@ -435,7 +445,9 @@ def resume_settings():
|
||||
print("⚠️ Invalid input, keeping current value")
|
||||
|
||||
# Keep days
|
||||
days_input = input(f"Keep progress for how many days [{current['keep_progress_days']}]: ").strip()
|
||||
days_input = input(
|
||||
f"Keep progress for how many days [{current['keep_progress_days']}]: "
|
||||
).strip()
|
||||
if days_input:
|
||||
try:
|
||||
config.config["resume"]["keep_progress_days"] = int(days_input)
|
||||
@@ -467,7 +479,9 @@ def test_connections():
|
||||
token = config.config["github"]["profiles"][p["name"]]["token"]
|
||||
try:
|
||||
response = requests.get(
|
||||
"https://api.github.com/rate_limit", headers={"Authorization": f"token {token}"}, timeout=5
|
||||
"https://api.github.com/rate_limit",
|
||||
headers={"Authorization": f"token {token}"},
|
||||
timeout=5,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
|
||||
@@ -136,7 +136,9 @@ class ConfigEnhancer:
|
||||
# Call Claude API
|
||||
logger.info("📡 Calling Claude API for config analysis...")
|
||||
response = self.client.messages.create(
|
||||
model="claude-sonnet-4-20250514", max_tokens=8000, messages=[{"role": "user", "content": prompt}]
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=8000,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
|
||||
# Parse response
|
||||
@@ -157,7 +159,9 @@ class ConfigEnhancer:
|
||||
for cf in config_files[:10]: # Limit to first 10 files
|
||||
settings_summary = []
|
||||
for setting in cf.get("settings", [])[:5]: # First 5 settings per file
|
||||
settings_summary.append(f" - {setting['key']}: {setting['value']} ({setting['value_type']})")
|
||||
settings_summary.append(
|
||||
f" - {setting['key']}: {setting['value']} ({setting['value_type']})"
|
||||
)
|
||||
|
||||
config_summary.append(f"""
|
||||
File: {cf["relative_path"]} ({cf["config_type"]})
|
||||
@@ -221,7 +225,9 @@ Focus on actionable insights that help developers understand and improve their c
|
||||
original_result["ai_enhancements"] = enhancements
|
||||
|
||||
# Add enhancement flags to config files
|
||||
file_enhancements = {e["file_path"]: e for e in enhancements.get("file_enhancements", [])}
|
||||
file_enhancements = {
|
||||
e["file_path"]: e for e in enhancements.get("file_enhancements", [])
|
||||
}
|
||||
for cf in original_result.get("config_files", []):
|
||||
file_path = cf.get("relative_path", cf.get("file_path"))
|
||||
if file_path in file_enhancements:
|
||||
@@ -385,9 +391,14 @@ def main():
|
||||
parser = argparse.ArgumentParser(description="AI-enhance configuration extraction results")
|
||||
parser.add_argument("result_file", help="Path to config extraction JSON result file")
|
||||
parser.add_argument(
|
||||
"--mode", choices=["auto", "api", "local"], default="auto", help="Enhancement mode (default: auto)"
|
||||
"--mode",
|
||||
choices=["auto", "api", "local"],
|
||||
default="auto",
|
||||
help="Enhancement mode (default: auto)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", help="Output file for enhanced results (default: <input>_enhanced.json)"
|
||||
)
|
||||
parser.add_argument("--output", help="Output file for enhanced results (default: <input>_enhanced.json)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -63,7 +63,9 @@ class ConfigFile:
|
||||
|
||||
file_path: str
|
||||
relative_path: str
|
||||
config_type: Literal["json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"]
|
||||
config_type: Literal[
|
||||
"json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"
|
||||
]
|
||||
purpose: str # Inferred purpose: database, api, logging, etc.
|
||||
settings: list[ConfigSetting] = field(default_factory=list)
|
||||
patterns: list[str] = field(default_factory=list)
|
||||
@@ -156,11 +158,23 @@ class ConfigFileDetector:
|
||||
CONFIG_PATTERNS = {
|
||||
"json": {
|
||||
"patterns": ["*.json", "package.json", "tsconfig.json", "jsconfig.json"],
|
||||
"names": ["config.json", "settings.json", "app.json", ".eslintrc.json", ".prettierrc.json"],
|
||||
"names": [
|
||||
"config.json",
|
||||
"settings.json",
|
||||
"app.json",
|
||||
".eslintrc.json",
|
||||
".prettierrc.json",
|
||||
],
|
||||
},
|
||||
"yaml": {
|
||||
"patterns": ["*.yaml", "*.yml"],
|
||||
"names": ["config.yml", "settings.yml", ".travis.yml", ".gitlab-ci.yml", "docker-compose.yml"],
|
||||
"names": [
|
||||
"config.yml",
|
||||
"settings.yml",
|
||||
".travis.yml",
|
||||
".gitlab-ci.yml",
|
||||
"docker-compose.yml",
|
||||
],
|
||||
},
|
||||
"toml": {
|
||||
"patterns": ["*.toml"],
|
||||
@@ -498,7 +512,9 @@ class ConfigParser:
|
||||
key = match.group(1)
|
||||
value = match.group(3) if len(match.groups()) > 2 else match.group(2)
|
||||
|
||||
setting = ConfigSetting(key=key, value=value, value_type=self._infer_type(value))
|
||||
setting = ConfigSetting(
|
||||
key=key, value=value, value_type=self._infer_type(value)
|
||||
)
|
||||
config_file.settings.append(setting)
|
||||
|
||||
def _parse_dockerfile(self, config_file: ConfigFile):
|
||||
@@ -514,7 +530,10 @@ class ConfigParser:
|
||||
if len(parts) == 2:
|
||||
key, value = parts
|
||||
setting = ConfigSetting(
|
||||
key=key.strip(), value=value.strip(), value_type="string", env_var=key.strip()
|
||||
key=key.strip(),
|
||||
value=value.strip(),
|
||||
value_type="string",
|
||||
env_var=key.strip(),
|
||||
)
|
||||
config_file.settings.append(setting)
|
||||
|
||||
@@ -527,7 +546,9 @@ class ConfigParser:
|
||||
setting = ConfigSetting(key=key, value=value, value_type="string")
|
||||
config_file.settings.append(setting)
|
||||
|
||||
def _extract_settings_from_dict(self, data: dict, config_file: ConfigFile, parent_path: list[str] = None):
|
||||
def _extract_settings_from_dict(
|
||||
self, data: dict, config_file: ConfigFile, parent_path: list[str] = None
|
||||
):
|
||||
"""Recursively extract settings from dictionary"""
|
||||
if parent_path is None:
|
||||
parent_path = []
|
||||
@@ -636,7 +657,9 @@ class ConfigPatternDetector:
|
||||
|
||||
if matches >= min_match:
|
||||
detected.append(pattern_name)
|
||||
logger.debug(f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)")
|
||||
logger.debug(
|
||||
f"Detected {pattern_name} in {config_file.relative_path} ({matches} matches)"
|
||||
)
|
||||
|
||||
return detected
|
||||
|
||||
@@ -649,7 +672,9 @@ class ConfigExtractor:
|
||||
self.parser = ConfigParser()
|
||||
self.pattern_detector = ConfigPatternDetector()
|
||||
|
||||
def extract_from_directory(self, directory: Path, max_files: int = 100) -> ConfigExtractionResult:
|
||||
def extract_from_directory(
|
||||
self, directory: Path, max_files: int = 100
|
||||
) -> ConfigExtractionResult:
|
||||
"""
|
||||
Extract configuration patterns from directory.
|
||||
|
||||
@@ -695,7 +720,9 @@ class ConfigExtractor:
|
||||
logger.error(error_msg)
|
||||
result.errors.append(error_msg)
|
||||
|
||||
logger.info(f"Extracted {result.total_settings} settings from {result.total_files} config files")
|
||||
logger.info(
|
||||
f"Extracted {result.total_settings} settings from {result.total_files} config files"
|
||||
)
|
||||
logger.info(f"Detected patterns: {list(result.detected_patterns.keys())}")
|
||||
|
||||
return result
|
||||
@@ -741,12 +768,18 @@ def main():
|
||||
)
|
||||
parser.add_argument("directory", type=Path, help="Directory to analyze")
|
||||
parser.add_argument("--output", "-o", type=Path, help="Output JSON file")
|
||||
parser.add_argument("--max-files", type=int, default=100, help="Maximum config files to process")
|
||||
parser.add_argument(
|
||||
"--enhance", action="store_true", help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)"
|
||||
"--max-files", type=int, default=100, help="Maximum config files to process"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance-local", action="store_true", help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)"
|
||||
"--enhance",
|
||||
action="store_true",
|
||||
help="Enhance with AI analysis (API mode, requires ANTHROPIC_API_KEY)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance-local",
|
||||
action="store_true",
|
||||
help="Enhance with AI analysis (LOCAL mode, uses Claude Code CLI)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ai-mode",
|
||||
|
||||
@@ -27,7 +27,11 @@ class ConfigManager:
|
||||
DEFAULT_CONFIG = {
|
||||
"version": "1.0",
|
||||
"github": {"default_profile": None, "profiles": {}},
|
||||
"rate_limit": {"default_timeout_minutes": 30, "auto_switch_profiles": True, "show_countdown": True},
|
||||
"rate_limit": {
|
||||
"default_timeout_minutes": 30,
|
||||
"auto_switch_profiles": True,
|
||||
"show_countdown": True,
|
||||
},
|
||||
"resume": {"auto_save_interval_seconds": 60, "keep_progress_days": 7},
|
||||
"api_keys": {"anthropic": None, "google": None, "openai": None},
|
||||
"first_run": {"completed": False, "version": "2.7.0"},
|
||||
@@ -161,7 +165,9 @@ class ConfigManager:
|
||||
|
||||
return profiles
|
||||
|
||||
def get_github_token(self, profile_name: str | None = None, repo_url: str | None = None) -> str | None:
|
||||
def get_github_token(
|
||||
self, profile_name: str | None = None, repo_url: str | None = None
|
||||
) -> str | None:
|
||||
"""
|
||||
Get GitHub token with smart fallback chain.
|
||||
|
||||
@@ -269,7 +275,11 @@ class ConfigManager:
|
||||
2. Config file
|
||||
"""
|
||||
# Check environment first
|
||||
env_map = {"anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "openai": "OPENAI_API_KEY"}
|
||||
env_map = {
|
||||
"anthropic": "ANTHROPIC_API_KEY",
|
||||
"google": "GOOGLE_API_KEY",
|
||||
"openai": "OPENAI_API_KEY",
|
||||
}
|
||||
|
||||
env_var = env_map.get(provider)
|
||||
if env_var:
|
||||
|
||||
@@ -112,7 +112,9 @@ class ConfigValidator:
|
||||
# Validate merge_mode (optional)
|
||||
merge_mode = self.config.get("merge_mode", "rule-based")
|
||||
if merge_mode not in self.VALID_MERGE_MODES:
|
||||
raise ValueError(f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}")
|
||||
raise ValueError(
|
||||
f"Invalid merge_mode: '{merge_mode}'. Must be one of {self.VALID_MERGE_MODES}"
|
||||
)
|
||||
|
||||
# Validate each source
|
||||
for i, source in enumerate(sources):
|
||||
@@ -130,7 +132,9 @@ class ConfigValidator:
|
||||
source_type = source["type"]
|
||||
|
||||
if source_type not in self.VALID_SOURCE_TYPES:
|
||||
raise ValueError(f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}")
|
||||
raise ValueError(
|
||||
f"Source {index}: Invalid type '{source_type}'. Must be one of {self.VALID_SOURCE_TYPES}"
|
||||
)
|
||||
|
||||
# Type-specific validation
|
||||
if source_type == "documentation":
|
||||
@@ -147,7 +151,9 @@ class ConfigValidator:
|
||||
|
||||
# Optional but recommended fields
|
||||
if "selectors" not in source:
|
||||
logger.warning(f"Source {index} (documentation): No 'selectors' specified, using defaults")
|
||||
logger.warning(
|
||||
f"Source {index} (documentation): No 'selectors' specified, using defaults"
|
||||
)
|
||||
|
||||
if "max_pages" in source and not isinstance(source["max_pages"], int):
|
||||
raise ValueError(f"Source {index} (documentation): 'max_pages' must be an integer")
|
||||
@@ -178,8 +184,12 @@ class ConfigValidator:
|
||||
raise ValueError(f"Source {index} (github): 'max_issues' must be an integer")
|
||||
|
||||
# Validate enable_codebase_analysis if specified (C3.5)
|
||||
if "enable_codebase_analysis" in source and not isinstance(source["enable_codebase_analysis"], bool):
|
||||
raise ValueError(f"Source {index} (github): 'enable_codebase_analysis' must be a boolean")
|
||||
if "enable_codebase_analysis" in source and not isinstance(
|
||||
source["enable_codebase_analysis"], bool
|
||||
):
|
||||
raise ValueError(
|
||||
f"Source {index} (github): 'enable_codebase_analysis' must be a boolean"
|
||||
)
|
||||
|
||||
# Validate ai_mode if specified (C3.5)
|
||||
if "ai_mode" in source:
|
||||
@@ -249,7 +259,10 @@ class ConfigValidator:
|
||||
"description": self.config.get("description", "Documentation skill"),
|
||||
"merge_mode": "rule-based",
|
||||
"sources": [
|
||||
{"type": "documentation", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}
|
||||
{
|
||||
"type": "documentation",
|
||||
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
|
||||
}
|
||||
],
|
||||
}
|
||||
return unified
|
||||
@@ -261,7 +274,10 @@ class ConfigValidator:
|
||||
"description": self.config.get("description", "GitHub repository skill"),
|
||||
"merge_mode": "rule-based",
|
||||
"sources": [
|
||||
{"type": "github", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}
|
||||
{
|
||||
"type": "github",
|
||||
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
|
||||
}
|
||||
],
|
||||
}
|
||||
return unified
|
||||
@@ -272,7 +288,12 @@ class ConfigValidator:
|
||||
"name": self.config.get("name", "unnamed"),
|
||||
"description": self.config.get("description", "PDF document skill"),
|
||||
"merge_mode": "rule-based",
|
||||
"sources": [{"type": "pdf", **{k: v for k, v in self.config.items() if k not in ["name", "description"]}}],
|
||||
"sources": [
|
||||
{
|
||||
"type": "pdf",
|
||||
**{k: v for k, v in self.config.items() if k not in ["name", "description"]},
|
||||
}
|
||||
],
|
||||
}
|
||||
return unified
|
||||
|
||||
@@ -312,11 +333,13 @@ class ConfigValidator:
|
||||
return False
|
||||
|
||||
has_docs_api = any(
|
||||
s.get("type") == "documentation" and s.get("extract_api", True) for s in self.config["sources"]
|
||||
s.get("type") == "documentation" and s.get("extract_api", True)
|
||||
for s in self.config["sources"]
|
||||
)
|
||||
|
||||
has_github_code = any(
|
||||
s.get("type") == "github" and s.get("include_code", False) for s in self.config["sources"]
|
||||
s.get("type") == "github" and s.get("include_code", False)
|
||||
for s in self.config["sources"]
|
||||
)
|
||||
|
||||
return has_docs_api and has_github_code
|
||||
|
||||
@@ -451,7 +451,12 @@ class ConflictDetector:
|
||||
}
|
||||
|
||||
# Count by type
|
||||
for conflict_type in ["missing_in_docs", "missing_in_code", "signature_mismatch", "description_mismatch"]:
|
||||
for conflict_type in [
|
||||
"missing_in_docs",
|
||||
"missing_in_code",
|
||||
"signature_mismatch",
|
||||
"description_mismatch",
|
||||
]:
|
||||
count = sum(1 for c in conflicts if c.type == conflict_type)
|
||||
summary["by_type"][conflict_type] = count
|
||||
|
||||
@@ -470,7 +475,10 @@ class ConflictDetector:
|
||||
conflicts: List of Conflict objects
|
||||
output_path: Path to output JSON file
|
||||
"""
|
||||
data = {"conflicts": [asdict(c) for c in conflicts], "summary": self.generate_summary(conflicts)}
|
||||
data = {
|
||||
"conflicts": [asdict(c) for c in conflicts],
|
||||
"summary": self.generate_summary(conflicts),
|
||||
}
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
@@ -86,7 +86,9 @@ class DependencyAnalyzer:
|
||||
def __init__(self):
|
||||
"""Initialize dependency analyzer."""
|
||||
if not NETWORKX_AVAILABLE:
|
||||
raise ImportError("NetworkX is required for dependency analysis. Install with: pip install networkx")
|
||||
raise ImportError(
|
||||
"NetworkX is required for dependency analysis. Install with: pip install networkx"
|
||||
)
|
||||
|
||||
self.graph = nx.DiGraph() # Directed graph for dependencies
|
||||
self.file_dependencies: dict[str, list[DependencyInfo]] = {}
|
||||
@@ -130,7 +132,9 @@ class DependencyAnalyzer:
|
||||
|
||||
# Create file node
|
||||
imported_modules = [dep.imported_module for dep in deps]
|
||||
self.file_nodes[file_path] = FileNode(file_path=file_path, language=language, dependencies=imported_modules)
|
||||
self.file_nodes[file_path] = FileNode(
|
||||
file_path=file_path, language=language, dependencies=imported_modules
|
||||
)
|
||||
|
||||
return deps
|
||||
|
||||
@@ -594,7 +598,9 @@ class DependencyAnalyzer:
|
||||
|
||||
if target and target in self.file_nodes:
|
||||
# Add edge from source to dependency
|
||||
self.graph.add_edge(file_path, target, import_type=dep.import_type, line_number=dep.line_number)
|
||||
self.graph.add_edge(
|
||||
file_path, target, import_type=dep.import_type, line_number=dep.line_number
|
||||
)
|
||||
|
||||
# Update imported_by lists
|
||||
if target in self.file_nodes:
|
||||
@@ -602,7 +608,9 @@ class DependencyAnalyzer:
|
||||
|
||||
return self.graph
|
||||
|
||||
def _resolve_import(self, source_file: str, imported_module: str, is_relative: bool) -> str | None:
|
||||
def _resolve_import(
|
||||
self, source_file: str, imported_module: str, is_relative: bool
|
||||
) -> str | None:
|
||||
"""
|
||||
Resolve import statement to actual file path.
|
||||
|
||||
@@ -736,10 +744,14 @@ class DependencyAnalyzer:
|
||||
"circular_dependencies": len(self.detect_cycles()),
|
||||
"strongly_connected_components": len(self.get_strongly_connected_components()),
|
||||
"avg_dependencies_per_file": (
|
||||
self.graph.number_of_edges() / self.graph.number_of_nodes() if self.graph.number_of_nodes() > 0 else 0
|
||||
self.graph.number_of_edges() / self.graph.number_of_nodes()
|
||||
if self.graph.number_of_nodes() > 0
|
||||
else 0
|
||||
),
|
||||
"files_with_no_dependencies": len(
|
||||
[node for node in self.graph.nodes() if self.graph.out_degree(node) == 0]
|
||||
),
|
||||
"files_not_imported": len([node for node in self.graph.nodes() if self.graph.in_degree(node) == 0]),
|
||||
"files_not_imported": len(
|
||||
[node for node in self.graph.nodes() if self.graph.in_degree(node) == 0]
|
||||
),
|
||||
}
|
||||
|
||||
@@ -65,7 +65,9 @@ def setup_logging(verbose: bool = False, quiet: bool = False) -> None:
|
||||
logging.basicConfig(level=level, format="%(message)s", force=True)
|
||||
|
||||
|
||||
def infer_description_from_docs(base_url: str, first_page_content: str | None = None, name: str = "") -> str:
|
||||
def infer_description_from_docs(
|
||||
base_url: str, first_page_content: str | None = None, name: str = ""
|
||||
) -> str:
|
||||
"""
|
||||
Infer skill description from documentation metadata or first page content.
|
||||
|
||||
@@ -109,7 +111,13 @@ def infer_description_from_docs(base_url: str, first_page_content: str | None =
|
||||
# Strategy 3: Extract first meaningful paragraph from main content
|
||||
# Look for common documentation main content areas
|
||||
main_content = None
|
||||
for selector in ["article", "main", 'div[role="main"]', "div.content", "div.doc-content"]:
|
||||
for selector in [
|
||||
"article",
|
||||
"main",
|
||||
'div[role="main"]',
|
||||
"div.content",
|
||||
"div.doc-content",
|
||||
]:
|
||||
main_content = soup.select_one(selector)
|
||||
if main_content:
|
||||
break
|
||||
@@ -120,7 +128,8 @@ def infer_description_from_docs(base_url: str, first_page_content: str | None =
|
||||
text = p.get_text().strip()
|
||||
# Skip empty, very short, or navigation-like paragraphs
|
||||
if len(text) > 30 and not any(
|
||||
skip in text.lower() for skip in ["table of contents", "on this page", "navigation"]
|
||||
skip in text.lower()
|
||||
for skip in ["table of contents", "on this page", "navigation"]
|
||||
):
|
||||
# Clean and format
|
||||
if len(text) > 150:
|
||||
@@ -160,7 +169,8 @@ class DocToSkillConverter:
|
||||
skip_llms_txt_value = config.get("skip_llms_txt", False)
|
||||
if not isinstance(skip_llms_txt_value, bool):
|
||||
logger.warning(
|
||||
"Invalid value for 'skip_llms_txt': %r (expected bool). Defaulting to False.", skip_llms_txt_value
|
||||
"Invalid value for 'skip_llms_txt': %r (expected bool). Defaulting to False.",
|
||||
skip_llms_txt_value,
|
||||
)
|
||||
self.skip_llms_txt = False
|
||||
else:
|
||||
@@ -381,7 +391,15 @@ class DocToSkillConverter:
|
||||
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
|
||||
return self._extract_html_as_markdown(content, url)
|
||||
|
||||
page = {"url": url, "title": "", "content": "", "headings": [], "code_samples": [], "patterns": [], "links": []}
|
||||
page = {
|
||||
"url": url,
|
||||
"title": "",
|
||||
"content": "",
|
||||
"headings": [],
|
||||
"code_samples": [],
|
||||
"patterns": [],
|
||||
"links": [],
|
||||
}
|
||||
|
||||
lines = content.split("\n")
|
||||
|
||||
@@ -397,7 +415,9 @@ class DocToSkillConverter:
|
||||
if match:
|
||||
level = len(match.group(1))
|
||||
text = match.group(2).strip()
|
||||
page["headings"].append({"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")})
|
||||
page["headings"].append(
|
||||
{"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")}
|
||||
)
|
||||
|
||||
# Extract code blocks with language
|
||||
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
|
||||
@@ -464,7 +484,15 @@ class DocToSkillConverter:
|
||||
Falls back to <body> if no semantic content container found.
|
||||
Language detection uses detect_language() method.
|
||||
"""
|
||||
page = {"url": url, "title": "", "content": "", "headings": [], "code_samples": [], "patterns": [], "links": []}
|
||||
page = {
|
||||
"url": url,
|
||||
"title": "",
|
||||
"content": "",
|
||||
"headings": [],
|
||||
"code_samples": [],
|
||||
"patterns": [],
|
||||
"links": [],
|
||||
}
|
||||
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
@@ -515,7 +543,9 @@ class DocToSkillConverter:
|
||||
|
||||
return lang # Return string for backward compatibility
|
||||
|
||||
def extract_patterns(self, main: Any, code_samples: list[dict[str, Any]]) -> list[dict[str, str]]:
|
||||
def extract_patterns(
|
||||
self, main: Any, code_samples: list[dict[str, Any]]
|
||||
) -> list[dict[str, str]]:
|
||||
"""Extract common coding patterns (NEW FEATURE)"""
|
||||
patterns = []
|
||||
|
||||
@@ -527,7 +557,10 @@ class DocToSkillConverter:
|
||||
next_code = elem.find_next(["pre", "code"])
|
||||
if next_code:
|
||||
patterns.append(
|
||||
{"description": self.clean_text(elem.get_text()), "code": next_code.get_text().strip()}
|
||||
{
|
||||
"description": self.clean_text(elem.get_text()),
|
||||
"code": next_code.get_text().strip(),
|
||||
}
|
||||
)
|
||||
|
||||
return patterns[:5] # Limit to 5 most relevant patterns
|
||||
@@ -615,7 +648,9 @@ class DocToSkillConverter:
|
||||
logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e)
|
||||
logger.error(" URL: %s", url)
|
||||
|
||||
async def scrape_page_async(self, url: str, semaphore: asyncio.Semaphore, client: httpx.AsyncClient) -> None:
|
||||
async def scrape_page_async(
|
||||
self, url: str, semaphore: asyncio.Semaphore, client: httpx.AsyncClient
|
||||
) -> None:
|
||||
"""Scrape a single page asynchronously.
|
||||
|
||||
Args:
|
||||
@@ -682,7 +717,9 @@ class DocToSkillConverter:
|
||||
md_url = f"{url}/index.html.md"
|
||||
md_urls.append(md_url)
|
||||
|
||||
logger.info(" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls))
|
||||
logger.info(
|
||||
" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls)
|
||||
)
|
||||
return md_urls
|
||||
|
||||
# ORIGINAL _convert_to_md_urls (with HEAD request validation):
|
||||
@@ -744,7 +781,9 @@ class DocToSkillConverter:
|
||||
variants = detector.detect_all()
|
||||
|
||||
if variants:
|
||||
logger.info("\n🔍 Found %d total variant(s), downloading remaining...", len(variants))
|
||||
logger.info(
|
||||
"\n🔍 Found %d total variant(s), downloading remaining...", len(variants)
|
||||
)
|
||||
for variant_info in variants:
|
||||
url = variant_info["url"]
|
||||
variant = variant_info["variant"]
|
||||
@@ -759,7 +798,9 @@ class DocToSkillConverter:
|
||||
|
||||
if extra_content:
|
||||
extra_filename = extra_downloader.get_proper_filename()
|
||||
extra_filepath = os.path.join(self.skill_dir, "references", extra_filename)
|
||||
extra_filepath = os.path.join(
|
||||
self.skill_dir, "references", extra_filename
|
||||
)
|
||||
with open(extra_filepath, "w", encoding="utf-8") as f:
|
||||
f.write(extra_content)
|
||||
logger.info(" ✓ %s (%d chars)", extra_filename, len(extra_content))
|
||||
@@ -783,7 +824,9 @@ class DocToSkillConverter:
|
||||
if self.is_valid_url(url) and url not in self.visited_urls:
|
||||
self.pending_urls.append(url)
|
||||
|
||||
logger.info(" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls))
|
||||
logger.info(
|
||||
" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls)
|
||||
)
|
||||
|
||||
# Return False to trigger HTML scraping with the populated pending_urls
|
||||
self.llms_txt_detected = True
|
||||
@@ -824,7 +867,11 @@ class DocToSkillConverter:
|
||||
|
||||
if content:
|
||||
filename = downloader.get_proper_filename()
|
||||
downloaded[variant] = {"content": content, "filename": filename, "size": len(content)}
|
||||
downloaded[variant] = {
|
||||
"content": content,
|
||||
"filename": filename,
|
||||
"size": len(content),
|
||||
}
|
||||
logger.info(" ✓ %s (%d chars)", filename, len(content))
|
||||
|
||||
if not downloaded:
|
||||
@@ -902,7 +949,9 @@ class DocToSkillConverter:
|
||||
if not self.dry_run and not self.skip_llms_txt:
|
||||
llms_result = self._try_llms_txt()
|
||||
if llms_result:
|
||||
logger.info("\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant)
|
||||
logger.info(
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
|
||||
)
|
||||
self.save_summary()
|
||||
return
|
||||
|
||||
@@ -953,7 +1002,9 @@ class DocToSkillConverter:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
main_selector = self.config.get("selectors", {}).get("main_content", 'div[role="main"]')
|
||||
main_selector = self.config.get("selectors", {}).get(
|
||||
"main_content", 'div[role="main"]'
|
||||
)
|
||||
main = soup.select_one(main_selector)
|
||||
|
||||
if main:
|
||||
@@ -968,7 +1019,10 @@ class DocToSkillConverter:
|
||||
self.scrape_page(url)
|
||||
self.pages_scraped += 1
|
||||
|
||||
if self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0:
|
||||
if (
|
||||
self.checkpoint_enabled
|
||||
and self.pages_scraped % self.checkpoint_interval == 0
|
||||
):
|
||||
self.save_checkpoint()
|
||||
|
||||
if len(self.visited_urls) % 10 == 0:
|
||||
@@ -1019,7 +1073,10 @@ class DocToSkillConverter:
|
||||
with self.lock:
|
||||
self.pages_scraped += 1
|
||||
|
||||
if self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0:
|
||||
if (
|
||||
self.checkpoint_enabled
|
||||
and self.pages_scraped % self.checkpoint_interval == 0
|
||||
):
|
||||
self.save_checkpoint()
|
||||
|
||||
if self.pages_scraped % 10 == 0:
|
||||
@@ -1062,7 +1119,9 @@ class DocToSkillConverter:
|
||||
if not self.dry_run and not self.skip_llms_txt:
|
||||
llms_result = self._try_llms_txt()
|
||||
if llms_result:
|
||||
logger.info("\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant)
|
||||
logger.info(
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
|
||||
)
|
||||
self.save_summary()
|
||||
return
|
||||
|
||||
@@ -1097,7 +1156,9 @@ class DocToSkillConverter:
|
||||
semaphore = asyncio.Semaphore(self.workers)
|
||||
|
||||
# Create shared HTTP client with connection pooling
|
||||
async with httpx.AsyncClient(timeout=30.0, limits=httpx.Limits(max_connections=self.workers * 2)) as client:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0, limits=httpx.Limits(max_connections=self.workers * 2)
|
||||
) as client:
|
||||
tasks = []
|
||||
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
@@ -1120,7 +1181,9 @@ class DocToSkillConverter:
|
||||
if self.dry_run:
|
||||
logger.info(" [Preview] %s", url)
|
||||
else:
|
||||
task = asyncio.create_task(self.scrape_page_async(url, semaphore, client))
|
||||
task = asyncio.create_task(
|
||||
self.scrape_page_async(url, semaphore, client)
|
||||
)
|
||||
tasks.append(task)
|
||||
|
||||
# Wait for batch to complete before continuing
|
||||
@@ -1145,7 +1208,9 @@ class DocToSkillConverter:
|
||||
if self.dry_run:
|
||||
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
|
||||
if len(self.visited_urls) >= preview_limit:
|
||||
logger.info(" (showing first %d, actual scraping may find more)", int(preview_limit))
|
||||
logger.info(
|
||||
" (showing first %d, actual scraping may find more)", int(preview_limit)
|
||||
)
|
||||
logger.info("\n💡 To actually scrape, run without --dry-run")
|
||||
else:
|
||||
logger.info("\n✅ Scraped %d pages (async mode)", len(self.visited_urls))
|
||||
@@ -1178,8 +1243,12 @@ class DocToSkillConverter:
|
||||
with open(json_file, encoding="utf-8") as f:
|
||||
pages.append(json.load(f))
|
||||
except Exception as e:
|
||||
logger.error("⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e)
|
||||
logger.error(" Suggestion: File may be corrupted, consider re-scraping with --fresh")
|
||||
logger.error(
|
||||
"⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e
|
||||
)
|
||||
logger.error(
|
||||
" Suggestion: File may be corrupted, consider re-scraping with --fresh"
|
||||
)
|
||||
|
||||
return pages
|
||||
|
||||
@@ -1197,7 +1266,9 @@ class DocToSkillConverter:
|
||||
for page in pages:
|
||||
url = page["url"].lower()
|
||||
title = page["title"].lower()
|
||||
content = page.get("content", "").lower()[:CONTENT_PREVIEW_LENGTH] # Check first N chars for categorization
|
||||
content = page.get("content", "").lower()[
|
||||
:CONTENT_PREVIEW_LENGTH
|
||||
] # Check first N chars for categorization
|
||||
|
||||
categorized = False
|
||||
|
||||
@@ -1232,7 +1303,9 @@ class DocToSkillConverter:
|
||||
|
||||
for page in pages:
|
||||
path = urlparse(page["url"]).path
|
||||
segments = [s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]]
|
||||
segments = [
|
||||
s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]
|
||||
]
|
||||
|
||||
for seg in segments:
|
||||
url_segments[seg] += 1
|
||||
@@ -1246,10 +1319,14 @@ class DocToSkillConverter:
|
||||
categories[seg] = [seg]
|
||||
|
||||
# Add common defaults
|
||||
if "tutorial" not in categories and any("tutorial" in url for url in [p["url"] for p in pages]):
|
||||
if "tutorial" not in categories and any(
|
||||
"tutorial" in url for url in [p["url"] for p in pages]
|
||||
):
|
||||
categories["tutorials"] = ["tutorial", "guide", "getting-started"]
|
||||
|
||||
if "api" not in categories and any("api" in url or "reference" in url for url in [p["url"] for p in pages]):
|
||||
if "api" not in categories and any(
|
||||
"api" in url or "reference" in url for url in [p["url"] for p in pages]
|
||||
):
|
||||
categories["api"] = ["api", "reference", "class"]
|
||||
|
||||
return categories
|
||||
@@ -1551,12 +1628,16 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
# Validate name (alphanumeric, hyphens, underscores only)
|
||||
if "name" in config:
|
||||
if not re.match(r"^[a-zA-Z0-9_-]+$", config["name"]):
|
||||
errors.append(f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)")
|
||||
errors.append(
|
||||
f"Invalid name: '{config['name']}' (use only letters, numbers, hyphens, underscores)"
|
||||
)
|
||||
|
||||
# Validate base_url
|
||||
if "base_url" in config:
|
||||
if not config["base_url"].startswith(("http://", "https://")):
|
||||
errors.append(f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)")
|
||||
errors.append(
|
||||
f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)"
|
||||
)
|
||||
|
||||
# Validate selectors structure
|
||||
if "selectors" in config:
|
||||
@@ -1596,7 +1677,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
if rate < 0:
|
||||
errors.append(f"'rate_limit' must be non-negative (got {rate})")
|
||||
elif rate > 10:
|
||||
warnings.append(f"'rate_limit' is very high ({rate}s) - this may slow down scraping significantly")
|
||||
warnings.append(
|
||||
f"'rate_limit' is very high ({rate}s) - this may slow down scraping significantly"
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
errors.append(f"'rate_limit' must be a number (got {config['rate_limit']})")
|
||||
|
||||
@@ -1606,19 +1689,29 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
|
||||
# Allow None for unlimited
|
||||
if max_p_value is None:
|
||||
warnings.append("'max_pages' is None (unlimited) - this will scrape ALL pages. Use with caution!")
|
||||
warnings.append(
|
||||
"'max_pages' is None (unlimited) - this will scrape ALL pages. Use with caution!"
|
||||
)
|
||||
else:
|
||||
try:
|
||||
max_p = int(max_p_value)
|
||||
# Allow -1 for unlimited
|
||||
if max_p == -1:
|
||||
warnings.append("'max_pages' is -1 (unlimited) - this will scrape ALL pages. Use with caution!")
|
||||
warnings.append(
|
||||
"'max_pages' is -1 (unlimited) - this will scrape ALL pages. Use with caution!"
|
||||
)
|
||||
elif max_p < 1:
|
||||
errors.append(f"'max_pages' must be at least 1 or -1 for unlimited (got {max_p})")
|
||||
errors.append(
|
||||
f"'max_pages' must be at least 1 or -1 for unlimited (got {max_p})"
|
||||
)
|
||||
elif max_p > MAX_PAGES_WARNING_THRESHOLD:
|
||||
warnings.append(f"'max_pages' is very high ({max_p}) - scraping may take a very long time")
|
||||
warnings.append(
|
||||
f"'max_pages' is very high ({max_p}) - scraping may take a very long time"
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
errors.append(f"'max_pages' must be an integer, -1, or null (got {config['max_pages']})")
|
||||
errors.append(
|
||||
f"'max_pages' must be an integer, -1, or null (got {config['max_pages']})"
|
||||
)
|
||||
|
||||
# Validate start_urls if present
|
||||
if "start_urls" in config:
|
||||
@@ -1627,7 +1720,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
else:
|
||||
for url in config["start_urls"]:
|
||||
if not url.startswith(("http://", "https://")):
|
||||
errors.append(f"Invalid start_url: '{url}' (must start with http:// or https://)")
|
||||
errors.append(
|
||||
f"Invalid start_url: '{url}' (must start with http:// or https://)"
|
||||
)
|
||||
|
||||
return errors, warnings
|
||||
|
||||
@@ -1716,7 +1811,9 @@ def interactive_config() -> dict[str, Any]:
|
||||
# Selectors
|
||||
logger.info("\nCSS Selectors (press Enter for defaults):")
|
||||
selectors = {}
|
||||
selectors["main_content"] = input(" Main content [div[role='main']]: ").strip() or "div[role='main']"
|
||||
selectors["main_content"] = (
|
||||
input(" Main content [div[role='main']]: ").strip() or "div[role='main']"
|
||||
)
|
||||
selectors["title"] = input(" Title [title]: ").strip() or "title"
|
||||
selectors["code_blocks"] = input(" Code blocks [pre code]: ").strip() or "pre code"
|
||||
config["selectors"] = selectors
|
||||
@@ -1782,15 +1879,27 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
|
||||
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive configuration mode")
|
||||
parser.add_argument("--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)")
|
||||
parser.add_argument(
|
||||
"--interactive", "-i", action="store_true", help="Interactive configuration mode"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)"
|
||||
)
|
||||
parser.add_argument("--name", type=str, help="Skill name")
|
||||
parser.add_argument("--url", type=str, help="Base documentation URL")
|
||||
parser.add_argument("--description", "-d", type=str, help="Skill description")
|
||||
parser.add_argument("--skip-scrape", action="store_true", help="Skip scraping, use existing data")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview what will be scraped without actually scraping")
|
||||
parser.add_argument(
|
||||
"--enhance", action="store_true", help="Enhance SKILL.md using Claude API after building (requires API key)"
|
||||
"--skip-scrape", action="store_true", help="Skip scraping, use existing data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Preview what will be scraped without actually scraping",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance",
|
||||
action="store_true",
|
||||
help="Enhance SKILL.md using Claude API after building (requires API key)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance-local",
|
||||
@@ -1802,8 +1911,14 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
action="store_true",
|
||||
help="Open terminal window for enhancement (use with --enhance-local)",
|
||||
)
|
||||
parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)")
|
||||
parser.add_argument("--resume", action="store_true", help="Resume from last checkpoint (for interrupted scrapes)")
|
||||
parser.add_argument(
|
||||
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
action="store_true",
|
||||
help="Resume from last checkpoint (for interrupted scrapes)",
|
||||
)
|
||||
parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh")
|
||||
parser.add_argument(
|
||||
"--rate-limit",
|
||||
@@ -1826,10 +1941,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
help="Enable async mode for better parallel performance (2-3x faster than threads)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-rate-limit", action="store_true", help="Disable rate limiting completely (same as --rate-limit 0)"
|
||||
"--no-rate-limit",
|
||||
action="store_true",
|
||||
help="Disable rate limiting completely (same as --rate-limit 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)"
|
||||
)
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)")
|
||||
parser.add_argument("--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)")
|
||||
|
||||
return parser
|
||||
|
||||
@@ -1866,7 +1987,11 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
|
||||
"name": args.name,
|
||||
"description": args.description or f"Use when working with {args.name}",
|
||||
"base_url": args.url,
|
||||
"selectors": {"main_content": "div[role='main']", "title": "title", "code_blocks": "pre code"},
|
||||
"selectors": {
|
||||
"main_content": "div[role='main']",
|
||||
"title": "title",
|
||||
"code_blocks": "pre code",
|
||||
},
|
||||
"url_patterns": {"include": [], "exclude": []},
|
||||
"rate_limit": DEFAULT_RATE_LIMIT,
|
||||
"max_pages": DEFAULT_MAX_PAGES,
|
||||
@@ -1903,12 +2028,16 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
|
||||
if config.get("workers", 1) > 1:
|
||||
logger.info("⚡ Async mode enabled (2-3x faster than threads)")
|
||||
else:
|
||||
logger.warning("⚠️ Async mode enabled but workers=1. Consider using --workers 4 for better performance")
|
||||
logger.warning(
|
||||
"⚠️ Async mode enabled but workers=1. Consider using --workers 4 for better performance"
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def execute_scraping_and_building(config: dict[str, Any], args: argparse.Namespace) -> Optional["DocToSkillConverter"]:
|
||||
def execute_scraping_and_building(
|
||||
config: dict[str, Any], args: argparse.Namespace
|
||||
) -> Optional["DocToSkillConverter"]:
|
||||
"""Execute the scraping and skill building process.
|
||||
|
||||
Handles dry run mode, existing data checks, scraping with checkpoints,
|
||||
@@ -1995,7 +2124,10 @@ def execute_scraping_and_building(config: dict[str, Any], args: argparse.Namespa
|
||||
if converter.checkpoint_enabled:
|
||||
converter.save_checkpoint()
|
||||
logger.info("💾 Progress saved to checkpoint")
|
||||
logger.info(" Resume with: --config %s --resume", args.config if args.config else "config.json")
|
||||
logger.info(
|
||||
" Resume with: --config %s --resume",
|
||||
args.config if args.config else "config.json",
|
||||
)
|
||||
response = input("Continue with skill building? (y/n): ").strip().lower()
|
||||
if response != "y":
|
||||
return None
|
||||
@@ -2086,7 +2218,9 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
|
||||
logger.info(" or re-run with: --enhance-local")
|
||||
logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"])
|
||||
logger.info(" or re-run with: --enhance")
|
||||
logger.info("\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window")
|
||||
logger.info(
|
||||
"\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window"
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
||||
@@ -41,7 +41,9 @@ class SkillEnhancer:
|
||||
self.skill_md_path = self.skill_dir / "SKILL.md"
|
||||
|
||||
# Get API key - support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN
|
||||
self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
||||
self.api_key = (
|
||||
api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
||||
)
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"No API key provided. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN "
|
||||
@@ -174,7 +176,9 @@ This skill combines knowledge from {len(sources_found)} source type(s):
|
||||
if repo_id:
|
||||
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
|
||||
else:
|
||||
prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
|
||||
prompt += (
|
||||
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
|
||||
)
|
||||
prompt += f"```markdown\n{content}\n```\n"
|
||||
|
||||
prompt += """
|
||||
@@ -295,7 +299,9 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
|
||||
|
||||
# Read reference files
|
||||
print("📖 Reading reference documentation...")
|
||||
references = read_reference_files(self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT)
|
||||
references = read_reference_files(
|
||||
self.skill_dir, max_chars=API_CONTENT_LIMIT, preview_limit=API_PREVIEW_LIMIT
|
||||
)
|
||||
|
||||
if not references:
|
||||
print("❌ No reference files found to analyze")
|
||||
@@ -334,7 +340,9 @@ Return ONLY the complete SKILL.md content, starting with the frontmatter (---).
|
||||
print("\n✅ Enhancement complete!")
|
||||
print("\nNext steps:")
|
||||
print(f" 1. Review: {self.skill_md_path}")
|
||||
print(f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}")
|
||||
print(
|
||||
f" 2. If you don't like it, restore backup: {self.skill_md_path.with_suffix('.md.backup')}"
|
||||
)
|
||||
print(" 3. Package your skill:")
|
||||
print(f" skill-seekers package {self.skill_dir}/")
|
||||
|
||||
@@ -367,15 +375,21 @@ Examples:
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)")
|
||||
parser.add_argument("--api-key", type=str, help="Platform API key (or set environment variable)")
|
||||
parser.add_argument(
|
||||
"skill_dir", type=str, help="Path to skill directory (e.g., output/steam-inventory/)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key", type=str, help="Platform API key (or set environment variable)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target",
|
||||
choices=["claude", "gemini", "openai"],
|
||||
default="claude",
|
||||
help="Target LLM platform (default: claude)",
|
||||
)
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be done without calling API")
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Show what would be done without calling API"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -447,7 +461,9 @@ Examples:
|
||||
print("\n✅ Enhancement complete!")
|
||||
print("\nNext steps:")
|
||||
print(f" 1. Review: {Path(skill_dir) / 'SKILL.md'}")
|
||||
print(f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}")
|
||||
print(
|
||||
f" 2. If you don't like it, restore backup: {Path(skill_dir) / 'SKILL.md.backup'}"
|
||||
)
|
||||
print(" 3. Package your skill:")
|
||||
print(f" skill-seekers package {skill_dir}/ --target {args.target}")
|
||||
|
||||
|
||||
@@ -216,7 +216,9 @@ class LocalSkillEnhancer:
|
||||
if use_summarization or total_ref_size > 30000:
|
||||
if not use_summarization:
|
||||
print(f" ⚠️ Large skill detected ({total_ref_size:,} chars)")
|
||||
print(f" 📊 Applying smart summarization (target: {int(summarization_ratio * 100)}% of original)")
|
||||
print(
|
||||
f" 📊 Applying smart summarization (target: {int(summarization_ratio * 100)}% of original)"
|
||||
)
|
||||
print()
|
||||
|
||||
# Summarize each reference
|
||||
@@ -307,7 +309,9 @@ REFERENCE DOCUMENTATION:
|
||||
if repo_id:
|
||||
prompt += f"*Source: {metadata['source']} ({repo_id}), Confidence: {metadata['confidence']}*\n\n"
|
||||
else:
|
||||
prompt += f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
|
||||
prompt += (
|
||||
f"*Source: {metadata['source']}, Confidence: {metadata['confidence']}*\n\n"
|
||||
)
|
||||
prompt += f"{content}\n"
|
||||
|
||||
prompt += f"""
|
||||
@@ -528,7 +532,9 @@ After writing, the file SKILL.md should:
|
||||
return False
|
||||
|
||||
# Save prompt to temp file
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".txt", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
prompt_file = f.name
|
||||
f.write(prompt)
|
||||
|
||||
@@ -605,7 +611,9 @@ rm {prompt_file}
|
||||
print(f" - Prompt file: {prompt_file}")
|
||||
print(f" - Skill directory: {self.skill_dir.absolute()}")
|
||||
print(f" - SKILL.md will be saved to: {self.skill_md_path.absolute()}")
|
||||
print(f" - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}")
|
||||
print(
|
||||
f" - Original backed up to: {self.skill_md_path.with_suffix('.md.backup').absolute()}"
|
||||
)
|
||||
print()
|
||||
print("⏳ Wait for Claude Code to finish in the other terminal...")
|
||||
print(" (Usually takes 30-60 seconds)")
|
||||
@@ -782,7 +790,9 @@ rm {prompt_file}
|
||||
return
|
||||
|
||||
# Save prompt to temp file
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, encoding="utf-8") as f:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", suffix=".txt", delete=False, encoding="utf-8"
|
||||
) as f:
|
||||
prompt_file = f.name
|
||||
f.write(prompt)
|
||||
|
||||
@@ -791,7 +801,9 @@ rm {prompt_file}
|
||||
# Run enhancement
|
||||
if headless:
|
||||
# Run headless (subprocess.run - blocking in thread)
|
||||
result = subprocess.run(["claude", prompt_file], capture_output=True, text=True, timeout=timeout)
|
||||
result = subprocess.run(
|
||||
["claude", prompt_file], capture_output=True, text=True, timeout=timeout
|
||||
)
|
||||
|
||||
# Clean up
|
||||
try:
|
||||
@@ -800,9 +812,13 @@ rm {prompt_file}
|
||||
pass
|
||||
|
||||
if result.returncode == 0:
|
||||
self.write_status("completed", "Enhancement completed successfully!", progress=1.0)
|
||||
self.write_status(
|
||||
"completed", "Enhancement completed successfully!", progress=1.0
|
||||
)
|
||||
else:
|
||||
self.write_status("failed", error=f"Claude returned error: {result.returncode}")
|
||||
self.write_status(
|
||||
"failed", error=f"Claude returned error: {result.returncode}"
|
||||
)
|
||||
else:
|
||||
# Terminal mode in background doesn't make sense
|
||||
self.write_status("failed", error="Terminal mode not supported in background")
|
||||
@@ -951,7 +967,10 @@ except Exception as e:
|
||||
# Normal mode: Log to file
|
||||
with open(log_file, "w") as log:
|
||||
subprocess.Popen(
|
||||
["nohup", "python3", str(daemon_script_path)], stdout=log, stderr=log, start_new_session=True
|
||||
["nohup", "python3", str(daemon_script_path)],
|
||||
stdout=log,
|
||||
stderr=log,
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
# Give daemon time to start
|
||||
@@ -1033,10 +1052,14 @@ Force Mode (Default ON):
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--background", action="store_true", help="Run in background and return immediately (non-blocking)"
|
||||
"--background",
|
||||
action="store_true",
|
||||
help="Run in background and return immediately (non-blocking)",
|
||||
)
|
||||
|
||||
parser.add_argument("--daemon", action="store_true", help="Run as persistent daemon process (fully detached)")
|
||||
parser.add_argument(
|
||||
"--daemon", action="store_true", help="Run as persistent daemon process (fully detached)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-force",
|
||||
@@ -1045,7 +1068,10 @@ Force Mode (Default ON):
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--timeout", type=int, default=600, help="Timeout in seconds for headless mode (default: 600 = 10 minutes)"
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=600,
|
||||
help="Timeout in seconds for headless mode (default: 600 = 10 minutes)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -1053,7 +1079,9 @@ Force Mode (Default ON):
|
||||
# Validate mutually exclusive options
|
||||
mode_count = sum([args.interactive_enhancement, args.background, args.daemon])
|
||||
if mode_count > 1:
|
||||
print("❌ Error: --interactive-enhancement, --background, and --daemon are mutually exclusive")
|
||||
print(
|
||||
"❌ Error: --interactive-enhancement, --background, and --daemon are mutually exclusive"
|
||||
)
|
||||
print(" Choose only one mode")
|
||||
sys.exit(1)
|
||||
|
||||
@@ -1061,7 +1089,9 @@ Force Mode (Default ON):
|
||||
# Force mode is ON by default, use --no-force to disable
|
||||
enhancer = LocalSkillEnhancer(args.skill_directory, force=not args.no_force)
|
||||
headless = not args.interactive_enhancement # Invert: default is headless
|
||||
success = enhancer.run(headless=headless, timeout=args.timeout, background=args.background, daemon=args.daemon)
|
||||
success = enhancer.run(
|
||||
headless=headless, timeout=args.timeout, background=args.background, daemon=args.daemon
|
||||
)
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
@@ -149,12 +149,17 @@ Examples:
|
||||
parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)")
|
||||
|
||||
parser.add_argument(
|
||||
"--watch", "-w", action="store_true", help="Watch status in real-time (updates every 2 seconds)"
|
||||
"--watch",
|
||||
"-w",
|
||||
action="store_true",
|
||||
help="Watch status in real-time (updates every 2 seconds)",
|
||||
)
|
||||
|
||||
parser.add_argument("--json", action="store_true", help="Output raw JSON (for scripting)")
|
||||
|
||||
parser.add_argument("--interval", type=int, default=2, help="Watch update interval in seconds (default: 2)")
|
||||
parser.add_argument(
|
||||
"--interval", type=int, default=2, help="Watch update interval in seconds (default: 2)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -17,7 +17,11 @@ from bs4 import BeautifulSoup
|
||||
# Add parent directory to path for imports when run as script
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from skill_seekers.cli.constants import DEFAULT_MAX_DISCOVERY, DEFAULT_RATE_LIMIT, DISCOVERY_THRESHOLD
|
||||
from skill_seekers.cli.constants import (
|
||||
DEFAULT_MAX_DISCOVERY,
|
||||
DEFAULT_RATE_LIMIT,
|
||||
DISCOVERY_THRESHOLD,
|
||||
)
|
||||
|
||||
|
||||
def estimate_pages(config, max_discovery=DEFAULT_MAX_DISCOVERY, timeout=30):
|
||||
@@ -306,7 +310,12 @@ def list_all_configs():
|
||||
description = description[:57] + "..."
|
||||
|
||||
by_category[category].append(
|
||||
{"file": config_file.name, "path": str(rel_path), "name": name, "description": description}
|
||||
{
|
||||
"file": config_file.name,
|
||||
"path": str(rel_path),
|
||||
"name": name,
|
||||
"description": description,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
# If we can't parse the config, just use the filename
|
||||
@@ -366,7 +375,11 @@ Examples:
|
||||
)
|
||||
|
||||
parser.add_argument("config", nargs="?", help="Path to config JSON file")
|
||||
parser.add_argument("--all", action="store_true", help="List all available configs from api/configs_repo/official/")
|
||||
parser.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="List all available configs from api/configs_repo/official/",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-discovery",
|
||||
"-m",
|
||||
@@ -380,7 +393,13 @@ Examples:
|
||||
action="store_true",
|
||||
help="Remove discovery limit - discover all pages (same as --max-discovery -1)",
|
||||
)
|
||||
parser.add_argument("--timeout", "-t", type=int, default=30, help="HTTP request timeout in seconds (default: 30)")
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
"-t",
|
||||
type=int,
|
||||
default=30,
|
||||
help="HTTP request timeout in seconds (default: 30)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -35,7 +35,10 @@ class RouterGenerator:
|
||||
"""Generates router skills that direct to specialized sub-skills with GitHub integration"""
|
||||
|
||||
def __init__(
|
||||
self, config_paths: list[str], router_name: str = None, github_streams: Optional["ThreeStreamData"] = None
|
||||
self,
|
||||
config_paths: list[str],
|
||||
router_name: str = None,
|
||||
github_streams: Optional["ThreeStreamData"] = None,
|
||||
):
|
||||
"""
|
||||
Initialize router generator with optional GitHub streams.
|
||||
@@ -124,7 +127,10 @@ class RouterGenerator:
|
||||
label = label_info["label"].lower()
|
||||
|
||||
# Check if label relates to any skill keyword
|
||||
if any(keyword.lower() in label or label in keyword.lower() for keyword in skill_keywords):
|
||||
if any(
|
||||
keyword.lower() in label or label in keyword.lower()
|
||||
for keyword in skill_keywords
|
||||
):
|
||||
# Add twice for 2x weight
|
||||
keywords.append(label)
|
||||
keywords.append(label)
|
||||
@@ -217,9 +223,13 @@ class RouterGenerator:
|
||||
|
||||
if unique_topics:
|
||||
topics_str = ", ".join(unique_topics)
|
||||
description = f"{self.router_name.title()} framework. Use when working with: {topics_str}"
|
||||
description = (
|
||||
f"{self.router_name.title()} framework. Use when working with: {topics_str}"
|
||||
)
|
||||
else:
|
||||
description = f"Use when working with {self.router_name.title()} development and programming"
|
||||
description = (
|
||||
f"Use when working with {self.router_name.title()} development and programming"
|
||||
)
|
||||
|
||||
# Truncate to 200 chars for performance (agentskills.io recommendation)
|
||||
if len(description) > 200:
|
||||
@@ -357,7 +367,9 @@ compatibility: {compatibility}
|
||||
topic = self._extract_topic_from_skill(first_skill)
|
||||
keyword = first_keywords[0] if first_keywords else topic
|
||||
|
||||
examples.append(f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill')
|
||||
examples.append(
|
||||
f'**Q:** "How do I implement {keyword}?"\n**A:** Activates {first_skill} skill'
|
||||
)
|
||||
|
||||
# Example 2: Different skill (second sub-skill if available)
|
||||
if len(skill_names) >= 2:
|
||||
@@ -434,7 +446,9 @@ compatibility: {compatibility}
|
||||
f"**A:** Activates {skill_name} skill"
|
||||
)
|
||||
|
||||
return "\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords)
|
||||
return (
|
||||
"\n\n".join(examples) if examples else self._generate_dynamic_examples(routing_keywords)
|
||||
)
|
||||
|
||||
def _convert_issue_to_question(self, issue_title: str) -> str:
|
||||
"""
|
||||
@@ -492,7 +506,9 @@ compatibility: {compatibility}
|
||||
patterns = []
|
||||
|
||||
# Top 5 closed issues with most engagement (comments indicate usefulness)
|
||||
top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[:5]
|
||||
top_solutions = sorted(known_solutions, key=lambda x: x.get("comments", 0), reverse=True)[
|
||||
:5
|
||||
]
|
||||
|
||||
for issue in top_solutions:
|
||||
title = issue.get("title", "")
|
||||
@@ -1000,8 +1016,12 @@ GitHub issues related to this topic:
|
||||
md = "# Common GitHub Issues\n\n"
|
||||
md += "Top issues reported by the community:\n\n"
|
||||
|
||||
common_problems = self.github_issues.get("common_problems", [])[:10] if self.github_issues else []
|
||||
known_solutions = self.github_issues.get("known_solutions", [])[:10] if self.github_issues else []
|
||||
common_problems = (
|
||||
self.github_issues.get("common_problems", [])[:10] if self.github_issues else []
|
||||
)
|
||||
known_solutions = (
|
||||
self.github_issues.get("known_solutions", [])[:10] if self.github_issues else []
|
||||
)
|
||||
|
||||
if common_problems:
|
||||
md += "## Open Issues (Common Problems)\n\n"
|
||||
|
||||
@@ -77,7 +77,11 @@ class GitHubThreeStreamFetcher:
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, repo_url: str, github_token: str | None = None, interactive: bool = True, profile_name: str | None = None
|
||||
self,
|
||||
repo_url: str,
|
||||
github_token: str | None = None,
|
||||
interactive: bool = True,
|
||||
profile_name: str | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize fetcher.
|
||||
@@ -412,7 +416,9 @@ class GitHubThreeStreamFetcher:
|
||||
continue
|
||||
|
||||
# Skip hidden files (but allow docs in docs/ directories)
|
||||
is_in_docs_dir = any(pattern in str(file_path) for pattern in ["docs/", "doc/", "documentation/"])
|
||||
is_in_docs_dir = any(
|
||||
pattern in str(file_path) for pattern in ["docs/", "doc/", "documentation/"]
|
||||
)
|
||||
if any(part.startswith(".") for part in file_path.parts):
|
||||
if not is_in_docs_dir:
|
||||
continue
|
||||
@@ -495,9 +501,15 @@ class GitHubThreeStreamFetcher:
|
||||
label_counts = Counter(all_labels)
|
||||
|
||||
return {
|
||||
"common_problems": sorted(common_problems, key=lambda x: x["comments"], reverse=True)[:10],
|
||||
"known_solutions": sorted(known_solutions, key=lambda x: x["comments"], reverse=True)[:10],
|
||||
"top_labels": [{"label": label, "count": count} for label, count in label_counts.most_common(10)],
|
||||
"common_problems": sorted(common_problems, key=lambda x: x["comments"], reverse=True)[
|
||||
:10
|
||||
],
|
||||
"known_solutions": sorted(known_solutions, key=lambda x: x["comments"], reverse=True)[
|
||||
:10
|
||||
],
|
||||
"top_labels": [
|
||||
{"label": label, "count": count} for label, count in label_counts.most_common(10)
|
||||
],
|
||||
}
|
||||
|
||||
def read_file(self, file_path: Path) -> str | None:
|
||||
|
||||
@@ -178,7 +178,9 @@ class GitHubScraper:
|
||||
self.repo_name = config["repo"]
|
||||
self.name = config.get("name", self.repo_name.split("/")[-1])
|
||||
# Set initial description (will be improved after README extraction if not in config)
|
||||
self.description = config.get("description", f"Use when working with {self.repo_name.split('/')[-1]}")
|
||||
self.description = config.get(
|
||||
"description", f"Use when working with {self.repo_name.split('/')[-1]}"
|
||||
)
|
||||
|
||||
# Local repository path (optional - enables unlimited analysis)
|
||||
self.local_repo_path = local_repo_path or config.get("local_repo_path")
|
||||
@@ -192,14 +194,18 @@ class GitHubScraper:
|
||||
# Option 1: Replace mode - Use only specified exclusions
|
||||
if "exclude_dirs" in config:
|
||||
self.excluded_dirs = set(config["exclude_dirs"])
|
||||
logger.warning(f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden")
|
||||
logger.warning(
|
||||
f"Using custom directory exclusions ({len(self.excluded_dirs)} dirs) - defaults overridden"
|
||||
)
|
||||
logger.debug(f"Custom exclusions: {sorted(self.excluded_dirs)}")
|
||||
|
||||
# Option 2: Extend mode - Add to default exclusions
|
||||
elif "exclude_dirs_additional" in config:
|
||||
additional = set(config["exclude_dirs_additional"])
|
||||
self.excluded_dirs = self.excluded_dirs.union(additional)
|
||||
logger.info(f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})")
|
||||
logger.info(
|
||||
f"Added {len(additional)} custom directory exclusions (total: {len(self.excluded_dirs)})"
|
||||
)
|
||||
logger.debug(f"Additional exclusions: {sorted(additional)}")
|
||||
|
||||
# Load .gitignore for additional exclusions (C2.1)
|
||||
@@ -218,7 +224,9 @@ class GitHubScraper:
|
||||
self.include_changelog = config.get("include_changelog", True)
|
||||
self.include_releases = config.get("include_releases", True)
|
||||
self.include_code = config.get("include_code", False)
|
||||
self.code_analysis_depth = config.get("code_analysis_depth", "surface") # 'surface', 'deep', 'full'
|
||||
self.code_analysis_depth = config.get(
|
||||
"code_analysis_depth", "surface"
|
||||
) # 'surface', 'deep', 'full'
|
||||
self.file_patterns = config.get("file_patterns", [])
|
||||
|
||||
# Initialize code analyzer if deep analysis requested
|
||||
@@ -261,7 +269,9 @@ class GitHubScraper:
|
||||
logger.warning("Using GitHub token from config file (less secure)")
|
||||
return token
|
||||
|
||||
logger.warning("No GitHub token provided - using unauthenticated access (lower rate limits)")
|
||||
logger.warning(
|
||||
"No GitHub token provided - using unauthenticated access (lower rate limits)"
|
||||
)
|
||||
return None
|
||||
|
||||
def scrape(self) -> dict[str, Any]:
|
||||
@@ -334,7 +344,9 @@ class GitHubScraper:
|
||||
"topics": self.repo.get_topics(),
|
||||
}
|
||||
|
||||
logger.info(f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)")
|
||||
logger.info(
|
||||
f"Repository fetched: {self.repo.full_name} ({self.repo.stargazers_count} stars)"
|
||||
)
|
||||
|
||||
except GithubException as e:
|
||||
if e.status == 404:
|
||||
@@ -378,7 +390,9 @@ class GitHubScraper:
|
||||
file_size = getattr(content, "size", 0)
|
||||
|
||||
if download_url:
|
||||
logger.info(f"File {file_path} is large ({file_size:,} bytes), downloading via URL...")
|
||||
logger.info(
|
||||
f"File {file_path} is large ({file_size:,} bytes), downloading via URL..."
|
||||
)
|
||||
try:
|
||||
import requests
|
||||
|
||||
@@ -389,7 +403,9 @@ class GitHubScraper:
|
||||
logger.warning(f"Failed to download {file_path} from {download_url}: {e}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"File {file_path} has no download URL (encoding={content.encoding})")
|
||||
logger.warning(
|
||||
f"File {file_path} has no download URL (encoding={content.encoding})"
|
||||
)
|
||||
return None
|
||||
|
||||
# Handle regular files - decode content
|
||||
@@ -419,7 +435,14 @@ class GitHubScraper:
|
||||
logger.info("Extracting README...")
|
||||
|
||||
# Try common README locations
|
||||
readme_files = ["README.md", "README.rst", "README.txt", "README", "docs/README.md", ".github/README.md"]
|
||||
readme_files = [
|
||||
"README.md",
|
||||
"README.rst",
|
||||
"README.txt",
|
||||
"README",
|
||||
"docs/README.md",
|
||||
".github/README.md",
|
||||
]
|
||||
|
||||
for readme_path in readme_files:
|
||||
readme_content = self._get_file_content(readme_path)
|
||||
@@ -429,7 +452,9 @@ class GitHubScraper:
|
||||
|
||||
# Update description if not explicitly set in config
|
||||
if "description" not in self.config:
|
||||
smart_description = extract_description_from_readme(self.extracted_data["readme"], self.repo_name)
|
||||
smart_description = extract_description_from_readme(
|
||||
self.extracted_data["readme"], self.repo_name
|
||||
)
|
||||
self.description = smart_description
|
||||
logger.debug(f"Generated description: {self.description}")
|
||||
|
||||
@@ -465,7 +490,9 @@ class GitHubScraper:
|
||||
self.extracted_data["languages"] = {
|
||||
lang: {
|
||||
"bytes": bytes_count,
|
||||
"percentage": round((bytes_count / total_bytes) * 100, 2) if total_bytes > 0 else 0,
|
||||
"percentage": round((bytes_count / total_bytes) * 100, 2)
|
||||
if total_bytes > 0
|
||||
else 0,
|
||||
}
|
||||
for lang, bytes_count in languages.items()
|
||||
}
|
||||
@@ -502,7 +529,9 @@ class GitHubScraper:
|
||||
# For directories, we need to check both with and without trailing slash
|
||||
# as .gitignore patterns can match either way
|
||||
dir_path_with_slash = dir_path if dir_path.endswith("/") else dir_path + "/"
|
||||
if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(dir_path_with_slash):
|
||||
if self.gitignore_spec.match_file(dir_path) or self.gitignore_spec.match_file(
|
||||
dir_path_with_slash
|
||||
):
|
||||
logger.debug(f"Directory excluded by .gitignore: {dir_path}")
|
||||
return True
|
||||
|
||||
@@ -555,7 +584,9 @@ class GitHubScraper:
|
||||
return
|
||||
|
||||
# Log exclusions for debugging
|
||||
logger.info(f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}")
|
||||
logger.info(
|
||||
f"Directory exclusions ({len(self.excluded_dirs)} total): {sorted(list(self.excluded_dirs)[:10])}"
|
||||
)
|
||||
|
||||
file_tree = []
|
||||
excluded_count = 0
|
||||
@@ -594,7 +625,9 @@ class GitHubScraper:
|
||||
file_tree.append({"path": file_path, "type": "file", "size": file_size})
|
||||
|
||||
self.extracted_data["file_tree"] = file_tree
|
||||
logger.info(f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)")
|
||||
logger.info(
|
||||
f"File tree built (local mode): {len(file_tree)} items ({excluded_count} directories excluded)"
|
||||
)
|
||||
|
||||
def _extract_file_tree_github(self):
|
||||
"""Extract file tree from GitHub API (rate-limited)."""
|
||||
@@ -695,10 +728,16 @@ class GitHubScraper:
|
||||
file_content = self.repo.get_contents(file_path)
|
||||
content = file_content.decoded_content.decode("utf-8")
|
||||
|
||||
analysis_result = self.code_analyzer.analyze_file(file_path, content, primary_language)
|
||||
analysis_result = self.code_analyzer.analyze_file(
|
||||
file_path, content, primary_language
|
||||
)
|
||||
|
||||
if analysis_result and (analysis_result.get("classes") or analysis_result.get("functions")):
|
||||
analyzed_files.append({"file": file_path, "language": primary_language, **analysis_result})
|
||||
if analysis_result and (
|
||||
analysis_result.get("classes") or analysis_result.get("functions")
|
||||
):
|
||||
analyzed_files.append(
|
||||
{"file": file_path, "language": primary_language, **analysis_result}
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Analyzed {file_path}: "
|
||||
@@ -805,7 +844,9 @@ class GitHubScraper:
|
||||
"draft": release.draft,
|
||||
"prerelease": release.prerelease,
|
||||
"created_at": release.created_at.isoformat() if release.created_at else None,
|
||||
"published_at": release.published_at.isoformat() if release.published_at else None,
|
||||
"published_at": release.published_at.isoformat()
|
||||
if release.published_at
|
||||
else None,
|
||||
"url": release.html_url,
|
||||
"tarball_url": release.tarball_url,
|
||||
"zipball_url": release.zipball_url,
|
||||
@@ -973,13 +1014,21 @@ Use this skill when you need to:
|
||||
if has_c3_data:
|
||||
skill_content += "\n### Codebase Analysis References\n\n"
|
||||
if c3_data.get("patterns"):
|
||||
skill_content += "- `references/codebase_analysis/patterns/` - Design patterns detected\n"
|
||||
skill_content += (
|
||||
"- `references/codebase_analysis/patterns/` - Design patterns detected\n"
|
||||
)
|
||||
if c3_data.get("test_examples"):
|
||||
skill_content += "- `references/codebase_analysis/examples/` - Test examples extracted\n"
|
||||
skill_content += (
|
||||
"- `references/codebase_analysis/examples/` - Test examples extracted\n"
|
||||
)
|
||||
if c3_data.get("config_patterns"):
|
||||
skill_content += "- `references/codebase_analysis/configuration/` - Configuration analysis\n"
|
||||
skill_content += (
|
||||
"- `references/codebase_analysis/configuration/` - Configuration analysis\n"
|
||||
)
|
||||
if c3_data.get("architecture"):
|
||||
skill_content += "- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n"
|
||||
skill_content += (
|
||||
"- `references/codebase_analysis/ARCHITECTURE.md` - Architecture overview\n"
|
||||
)
|
||||
|
||||
# Usage
|
||||
skill_content += "\n## 💻 Usage\n\n"
|
||||
@@ -1020,7 +1069,9 @@ Use this skill when you need to:
|
||||
|
||||
lines = []
|
||||
for release in releases[:3]:
|
||||
lines.append(f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}")
|
||||
lines.append(
|
||||
f"- **{release['tag_name']}** ({release['published_at'][:10]}): {release['name']}"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -1132,7 +1183,9 @@ Use this skill when you need to:
|
||||
if patterns:
|
||||
content += "**Architectural Patterns:**\n"
|
||||
for pattern in patterns[:5]:
|
||||
content += f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n"
|
||||
content += (
|
||||
f"- {pattern.get('name', 'Unknown')}: {pattern.get('description', 'N/A')}\n"
|
||||
)
|
||||
content += "\n"
|
||||
|
||||
# Dependencies (C2.6)
|
||||
@@ -1233,7 +1286,9 @@ Use this skill when you need to:
|
||||
"""Generate releases.md reference file."""
|
||||
releases = self.data["releases"]
|
||||
|
||||
content = f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n"
|
||||
content = (
|
||||
f"# Releases\n\nVersion history for this repository ({len(releases)} releases).\n\n"
|
||||
)
|
||||
|
||||
for release in releases:
|
||||
content += f"## {release['tag_name']}: {release['name']}\n"
|
||||
@@ -1294,14 +1349,22 @@ Examples:
|
||||
parser.add_argument("--max-issues", type=int, default=100, help="Max issues to fetch")
|
||||
parser.add_argument("--scrape-only", action="store_true", help="Only scrape, don't build skill")
|
||||
parser.add_argument(
|
||||
"--enhance", action="store_true", help="Enhance SKILL.md using Claude API after building (requires API key)"
|
||||
"--enhance",
|
||||
action="store_true",
|
||||
help="Enhance SKILL.md using Claude API after building (requires API key)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enhance-local", action="store_true", help="Enhance SKILL.md using Claude Code (no API key needed)"
|
||||
"--enhance-local",
|
||||
action="store_true",
|
||||
help="Enhance SKILL.md using Claude Code (no API key needed)",
|
||||
)
|
||||
parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)")
|
||||
parser.add_argument(
|
||||
"--non-interactive", action="store_true", help="Non-interactive mode for CI/CD (fail fast on rate limits)"
|
||||
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--non-interactive",
|
||||
action="store_true",
|
||||
help="Non-interactive mode for CI/CD (fail fast on rate limits)",
|
||||
)
|
||||
parser.add_argument("--profile", type=str, help="GitHub profile name to use from config")
|
||||
|
||||
@@ -1368,7 +1431,9 @@ Examples:
|
||||
|
||||
api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
logger.error("❌ ANTHROPIC_API_KEY not set. Use --api-key or set environment variable.")
|
||||
logger.error(
|
||||
"❌ ANTHROPIC_API_KEY not set. Use --api-key or set environment variable."
|
||||
)
|
||||
logger.info("💡 Tip: Use --enhance-local instead (no API key needed)")
|
||||
else:
|
||||
# Import and run API enhancement
|
||||
@@ -1378,7 +1443,9 @@ Examples:
|
||||
enhance_skill_md(skill_dir, api_key)
|
||||
logger.info("✅ API enhancement complete!")
|
||||
except ImportError:
|
||||
logger.error("❌ API enhancement not available. Install: pip install anthropic")
|
||||
logger.error(
|
||||
"❌ API enhancement not available. Install: pip install anthropic"
|
||||
)
|
||||
logger.info("💡 Tip: Use --enhance-local instead (no API key needed)")
|
||||
|
||||
logger.info(f"\n✅ Success! Skill created at: {skill_dir}/")
|
||||
|
||||
@@ -92,7 +92,9 @@ class GuideEnhancer:
|
||||
self.client = anthropic.Anthropic(api_key=self.api_key)
|
||||
logger.info("✨ GuideEnhancer initialized in API mode")
|
||||
else:
|
||||
logger.warning("⚠️ API mode requested but anthropic library not available or no API key")
|
||||
logger.warning(
|
||||
"⚠️ API mode requested but anthropic library not available or no API key"
|
||||
)
|
||||
self.mode = "none"
|
||||
elif self.mode == "local":
|
||||
# Check if claude CLI is available
|
||||
@@ -133,7 +135,9 @@ class GuideEnhancer:
|
||||
def _check_claude_cli(self) -> bool:
|
||||
"""Check if Claude Code CLI is available."""
|
||||
try:
|
||||
result = subprocess.run(["claude", "--version"], capture_output=True, text=True, timeout=5)
|
||||
result = subprocess.run(
|
||||
["claude", "--version"], capture_output=True, text=True, timeout=5
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return False
|
||||
@@ -251,7 +255,9 @@ class GuideEnhancer:
|
||||
try:
|
||||
data = json.loads(response)
|
||||
return [
|
||||
PrerequisiteItem(name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", ""))
|
||||
PrerequisiteItem(
|
||||
name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", "")
|
||||
)
|
||||
for item in data.get("prerequisites_detailed", [])
|
||||
]
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
@@ -345,7 +351,9 @@ class GuideEnhancer:
|
||||
|
||||
try:
|
||||
response = self.client.messages.create(
|
||||
model="claude-sonnet-4-20250514", max_tokens=max_tokens, messages=[{"role": "user", "content": prompt}]
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=max_tokens,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
return response.content[0].text
|
||||
except Exception as e:
|
||||
@@ -690,7 +698,11 @@ IMPORTANT: Return ONLY valid JSON.
|
||||
# Prerequisites
|
||||
if "prerequisites_detailed" in data:
|
||||
enhanced["prerequisites_detailed"] = [
|
||||
PrerequisiteItem(name=item.get("name", ""), why=item.get("why", ""), setup=item.get("setup", ""))
|
||||
PrerequisiteItem(
|
||||
name=item.get("name", ""),
|
||||
why=item.get("why", ""),
|
||||
setup=item.get("setup", ""),
|
||||
)
|
||||
for item in data["prerequisites_detailed"]
|
||||
]
|
||||
|
||||
|
||||
@@ -140,7 +140,9 @@ class GuideCollection:
|
||||
return {
|
||||
"total_guides": self.total_guides,
|
||||
"guides_by_complexity": self.guides_by_complexity,
|
||||
"guides_by_use_case": {k: [g.to_dict() for g in v] for k, v in self.guides_by_use_case.items()},
|
||||
"guides_by_use_case": {
|
||||
k: [g.to_dict() for g in v] for k, v in self.guides_by_use_case.items()
|
||||
},
|
||||
"guides": [g.to_dict() for g in self.guides],
|
||||
}
|
||||
|
||||
@@ -224,7 +226,10 @@ class WorkflowAnalyzer:
|
||||
|
||||
steps.append(
|
||||
WorkflowStep(
|
||||
step_number=step_num, code=step_code, description=description, verification=verification
|
||||
step_number=step_num,
|
||||
code=step_code,
|
||||
description=description,
|
||||
verification=verification,
|
||||
)
|
||||
)
|
||||
step_num += 1
|
||||
@@ -253,7 +258,9 @@ class WorkflowAnalyzer:
|
||||
step_code = "\n".join(current_step)
|
||||
description = self._infer_description_from_code(step_code)
|
||||
|
||||
steps.append(WorkflowStep(step_number=step_num, code=step_code, description=description))
|
||||
steps.append(
|
||||
WorkflowStep(step_number=step_num, code=step_code, description=description)
|
||||
)
|
||||
step_num += 1
|
||||
current_step = []
|
||||
continue
|
||||
@@ -264,7 +271,9 @@ class WorkflowAnalyzer:
|
||||
if current_step:
|
||||
step_code = "\n".join(current_step)
|
||||
description = self._infer_description_from_code(step_code)
|
||||
steps.append(WorkflowStep(step_number=step_num, code=step_code, description=description))
|
||||
steps.append(
|
||||
WorkflowStep(step_number=step_num, code=step_code, description=description)
|
||||
)
|
||||
|
||||
return steps
|
||||
|
||||
@@ -400,7 +409,9 @@ class WorkflowAnalyzer:
|
||||
class WorkflowGrouper:
|
||||
"""Group related workflows into coherent guides"""
|
||||
|
||||
def group_workflows(self, workflows: list[dict], strategy: str = "ai-tutorial-group") -> dict[str, list[dict]]:
|
||||
def group_workflows(
|
||||
self, workflows: list[dict], strategy: str = "ai-tutorial-group"
|
||||
) -> dict[str, list[dict]]:
|
||||
"""
|
||||
Group workflows using specified strategy.
|
||||
|
||||
@@ -854,7 +865,9 @@ class HowToGuideBuilder:
|
||||
|
||||
if not workflows:
|
||||
logger.warning("No workflow examples found!")
|
||||
return GuideCollection(total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[])
|
||||
return GuideCollection(
|
||||
total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[]
|
||||
)
|
||||
|
||||
# Group workflows
|
||||
grouped_workflows = self.grouper.group_workflows(workflows, grouping_strategy)
|
||||
@@ -914,7 +927,9 @@ class HowToGuideBuilder:
|
||||
|
||||
# Extract source files
|
||||
source_files = [w.get("file_path", "") for w in workflows]
|
||||
source_files = [f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows)]
|
||||
source_files = [
|
||||
f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows)
|
||||
]
|
||||
|
||||
# Create guide
|
||||
guide = HowToGuide(
|
||||
@@ -1126,9 +1141,13 @@ Grouping Strategies:
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("input", nargs="?", help="Input: directory with test files OR test_examples.json file")
|
||||
parser.add_argument(
|
||||
"input", nargs="?", help="Input: directory with test files OR test_examples.json file"
|
||||
)
|
||||
|
||||
parser.add_argument("--input", dest="input_file", help="Input JSON file with test examples (from C3.2)")
|
||||
parser.add_argument(
|
||||
"--input", dest="input_file", help="Input JSON file with test examples (from C3.2)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
@@ -1145,7 +1164,9 @@ Grouping Strategies:
|
||||
|
||||
parser.add_argument("--no-ai", action="store_true", help="Disable AI enhancement")
|
||||
|
||||
parser.add_argument("--json-output", action="store_true", help="Output JSON summary instead of markdown files")
|
||||
parser.add_argument(
|
||||
"--json-output", action="store_true", help="Output JSON summary instead of markdown files"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -1191,7 +1212,9 @@ Grouping Strategies:
|
||||
builder = HowToGuideBuilder(enhance_with_ai=not args.no_ai)
|
||||
output_dir = Path(args.output) if not args.json_output else None
|
||||
|
||||
collection = builder.build_guides_from_examples(examples, grouping_strategy=args.group_by, output_dir=output_dir)
|
||||
collection = builder.build_guides_from_examples(
|
||||
examples, grouping_strategy=args.group_by, output_dir=output_dir
|
||||
)
|
||||
|
||||
# Output results
|
||||
if args.json_output:
|
||||
|
||||
@@ -366,11 +366,17 @@ Supported agents:
|
||||
|
||||
parser.add_argument("skill_directory", help="Path to skill directory (e.g., output/react/)")
|
||||
|
||||
parser.add_argument("--agent", required=True, help="Agent name (use 'all' to install to all agents)")
|
||||
parser.add_argument(
|
||||
"--agent", required=True, help="Agent name (use 'all' to install to all agents)"
|
||||
)
|
||||
|
||||
parser.add_argument("--force", action="store_true", help="Overwrite existing installation without asking")
|
||||
parser.add_argument(
|
||||
"--force", action="store_true", help="Overwrite existing installation without asking"
|
||||
)
|
||||
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview installation without making changes")
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Preview installation without making changes"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -442,7 +448,9 @@ Supported agents:
|
||||
if args.dry_run:
|
||||
print("\n🔍 DRY RUN MODE - No changes will be made\n")
|
||||
|
||||
success, message = install_to_agent(skill_dir, agent_name, force=args.force, dry_run=args.dry_run)
|
||||
success, message = install_to_agent(
|
||||
skill_dir, agent_name, force=args.force, dry_run=args.dry_run
|
||||
)
|
||||
|
||||
print(message)
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
# Import the MCP tool function (with lazy loading)
|
||||
try:
|
||||
from skill_seekers.mcp.server import install_skill_tool
|
||||
|
||||
MCP_AVAILABLE = True
|
||||
except ImportError:
|
||||
MCP_AVAILABLE = False
|
||||
@@ -99,15 +100,23 @@ Phases:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--config", required=True, help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')"
|
||||
"--config",
|
||||
required=True,
|
||||
help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
|
||||
)
|
||||
|
||||
parser.add_argument("--destination", default="output", help="Output directory for skill files (default: output/)")
|
||||
parser.add_argument(
|
||||
"--destination",
|
||||
default="output",
|
||||
help="Output directory for skill files (default: output/)",
|
||||
)
|
||||
|
||||
parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude")
|
||||
|
||||
parser.add_argument(
|
||||
"--unlimited", action="store_true", help="Remove page limits during scraping (WARNING: Can take hours)"
|
||||
"--unlimited",
|
||||
action="store_true",
|
||||
help="Remove page limits during scraping (WARNING: Can take hours)",
|
||||
)
|
||||
|
||||
parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing")
|
||||
|
||||
@@ -17,10 +17,15 @@ logger = logging.getLogger(__name__)
|
||||
try:
|
||||
from skill_seekers.cli.swift_patterns import SWIFT_PATTERNS
|
||||
except ImportError as e:
|
||||
logger.warning("Swift language detection patterns unavailable. Swift code detection will be disabled. Error: %s", e)
|
||||
logger.warning(
|
||||
"Swift language detection patterns unavailable. Swift code detection will be disabled. Error: %s",
|
||||
e,
|
||||
)
|
||||
SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {}
|
||||
except Exception as e:
|
||||
logger.error("Failed to load Swift patterns due to unexpected error: %s. Swift detection disabled.", e)
|
||||
logger.error(
|
||||
"Failed to load Swift patterns due to unexpected error: %s. Swift detection disabled.", e
|
||||
)
|
||||
SWIFT_PATTERNS: dict[str, list[tuple[str, int]]] = {}
|
||||
|
||||
# Verify Swift patterns were loaded correctly
|
||||
@@ -35,7 +40,8 @@ elif "swift" not in SWIFT_PATTERNS:
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Swift patterns loaded successfully: %d patterns for language detection", len(SWIFT_PATTERNS.get("swift", []))
|
||||
"Swift patterns loaded successfully: %d patterns for language detection",
|
||||
len(SWIFT_PATTERNS.get("swift", [])),
|
||||
)
|
||||
|
||||
# Comprehensive language patterns with weighted confidence scoring
|
||||
@@ -473,7 +479,8 @@ class LanguageDetector:
|
||||
self._pattern_cache[lang] = compiled_patterns
|
||||
else:
|
||||
logger.warning(
|
||||
"No valid patterns compiled for language '%s'. Detection for this language is disabled.", lang
|
||||
"No valid patterns compiled for language '%s'. Detection for this language is disabled.",
|
||||
lang,
|
||||
)
|
||||
|
||||
def detect_from_html(self, elem, code: str) -> tuple[str, float]:
|
||||
|
||||
@@ -98,7 +98,9 @@ class LlmsTxtDownloader:
|
||||
print(f" Retrying in {delay}s...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
print(f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}")
|
||||
print(
|
||||
f"❌ Failed to download {self.url} after {self.max_retries} attempts: {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@@ -135,7 +135,11 @@ class LlmsTxtParser:
|
||||
headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE)
|
||||
for level_markers, text in headings:
|
||||
page["headings"].append(
|
||||
{"level": f"h{len(level_markers)}", "text": text.strip(), "id": text.lower().replace(" ", "-")}
|
||||
{
|
||||
"level": f"h{len(level_markers)}",
|
||||
"text": text.strip(),
|
||||
"id": text.lower().replace(" ", "-"),
|
||||
}
|
||||
)
|
||||
|
||||
# Remove code blocks from content for plain text
|
||||
|
||||
@@ -66,52 +66,79 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
parser.add_argument("--version", action="version", version="%(prog)s 2.7.0")
|
||||
|
||||
subparsers = parser.add_subparsers(
|
||||
dest="command", title="commands", description="Available Skill Seekers commands", help="Command to run"
|
||||
dest="command",
|
||||
title="commands",
|
||||
description="Available Skill Seekers commands",
|
||||
help="Command to run",
|
||||
)
|
||||
|
||||
# === config subcommand ===
|
||||
config_parser = subparsers.add_parser(
|
||||
"config", help="Configure GitHub tokens, API keys, and settings", description="Interactive configuration wizard"
|
||||
"config",
|
||||
help="Configure GitHub tokens, API keys, and settings",
|
||||
description="Interactive configuration wizard",
|
||||
)
|
||||
config_parser.add_argument(
|
||||
"--github", action="store_true", help="Go directly to GitHub token setup"
|
||||
)
|
||||
config_parser.add_argument(
|
||||
"--api-keys", action="store_true", help="Go directly to API keys setup"
|
||||
)
|
||||
config_parser.add_argument(
|
||||
"--show", action="store_true", help="Show current configuration and exit"
|
||||
)
|
||||
config_parser.add_argument("--github", action="store_true", help="Go directly to GitHub token setup")
|
||||
config_parser.add_argument("--api-keys", action="store_true", help="Go directly to API keys setup")
|
||||
config_parser.add_argument("--show", action="store_true", help="Show current configuration and exit")
|
||||
config_parser.add_argument("--test", action="store_true", help="Test connections and exit")
|
||||
|
||||
# === scrape subcommand ===
|
||||
scrape_parser = subparsers.add_parser(
|
||||
"scrape", help="Scrape documentation website", description="Scrape documentation website and generate skill"
|
||||
"scrape",
|
||||
help="Scrape documentation website",
|
||||
description="Scrape documentation website and generate skill",
|
||||
)
|
||||
scrape_parser.add_argument("--config", help="Config JSON file")
|
||||
scrape_parser.add_argument("--name", help="Skill name")
|
||||
scrape_parser.add_argument("--url", help="Documentation URL")
|
||||
scrape_parser.add_argument("--description", help="Skill description")
|
||||
scrape_parser.add_argument("--skip-scrape", action="store_true", help="Skip scraping, use cached data")
|
||||
scrape_parser.add_argument(
|
||||
"--skip-scrape", action="store_true", help="Skip scraping, use cached data"
|
||||
)
|
||||
scrape_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
|
||||
scrape_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
|
||||
scrape_parser.add_argument(
|
||||
"--enhance-local", action="store_true", help="AI enhancement (local)"
|
||||
)
|
||||
scrape_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
|
||||
scrape_parser.add_argument("--async", dest="async_mode", action="store_true", help="Use async scraping")
|
||||
scrape_parser.add_argument(
|
||||
"--async", dest="async_mode", action="store_true", help="Use async scraping"
|
||||
)
|
||||
scrape_parser.add_argument("--workers", type=int, help="Number of async workers")
|
||||
|
||||
# === github subcommand ===
|
||||
github_parser = subparsers.add_parser(
|
||||
"github", help="Scrape GitHub repository", description="Scrape GitHub repository and generate skill"
|
||||
"github",
|
||||
help="Scrape GitHub repository",
|
||||
description="Scrape GitHub repository and generate skill",
|
||||
)
|
||||
github_parser.add_argument("--config", help="Config JSON file")
|
||||
github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
|
||||
github_parser.add_argument("--name", help="Skill name")
|
||||
github_parser.add_argument("--description", help="Skill description")
|
||||
github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
|
||||
github_parser.add_argument("--enhance-local", action="store_true", help="AI enhancement (local)")
|
||||
github_parser.add_argument(
|
||||
"--enhance-local", action="store_true", help="AI enhancement (local)"
|
||||
)
|
||||
github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
|
||||
github_parser.add_argument(
|
||||
"--non-interactive", action="store_true", help="Non-interactive mode (fail fast on rate limits)"
|
||||
"--non-interactive",
|
||||
action="store_true",
|
||||
help="Non-interactive mode (fail fast on rate limits)",
|
||||
)
|
||||
github_parser.add_argument("--profile", type=str, help="GitHub profile name from config")
|
||||
|
||||
# === pdf subcommand ===
|
||||
pdf_parser = subparsers.add_parser(
|
||||
"pdf", help="Extract from PDF file", description="Extract content from PDF and generate skill"
|
||||
"pdf",
|
||||
help="Extract from PDF file",
|
||||
description="Extract content from PDF and generate skill",
|
||||
)
|
||||
pdf_parser.add_argument("--config", help="Config JSON file")
|
||||
pdf_parser.add_argument("--pdf", help="PDF file path")
|
||||
@@ -138,7 +165,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
enhance_parser.add_argument("skill_directory", help="Skill directory path")
|
||||
enhance_parser.add_argument("--background", action="store_true", help="Run in background")
|
||||
enhance_parser.add_argument("--daemon", action="store_true", help="Run as daemon")
|
||||
enhance_parser.add_argument("--no-force", action="store_true", help="Disable force mode (enable confirmations)")
|
||||
enhance_parser.add_argument(
|
||||
"--no-force", action="store_true", help="Disable force mode (enable confirmations)"
|
||||
)
|
||||
enhance_parser.add_argument("--timeout", type=int, default=600, help="Timeout in seconds")
|
||||
|
||||
# === enhance-status subcommand ===
|
||||
@@ -148,13 +177,19 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
description="Monitor background enhancement processes",
|
||||
)
|
||||
enhance_status_parser.add_argument("skill_directory", help="Skill directory path")
|
||||
enhance_status_parser.add_argument("--watch", "-w", action="store_true", help="Watch in real-time")
|
||||
enhance_status_parser.add_argument(
|
||||
"--watch", "-w", action="store_true", help="Watch in real-time"
|
||||
)
|
||||
enhance_status_parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
enhance_status_parser.add_argument("--interval", type=int, default=2, help="Watch interval in seconds")
|
||||
enhance_status_parser.add_argument(
|
||||
"--interval", type=int, default=2, help="Watch interval in seconds"
|
||||
)
|
||||
|
||||
# === package subcommand ===
|
||||
package_parser = subparsers.add_parser(
|
||||
"package", help="Package skill into .zip file", description="Package skill directory into uploadable .zip"
|
||||
"package",
|
||||
help="Package skill into .zip file",
|
||||
description="Package skill directory into uploadable .zip",
|
||||
)
|
||||
package_parser.add_argument("skill_directory", help="Skill directory path")
|
||||
package_parser.add_argument("--no-open", action="store_true", help="Don't open output folder")
|
||||
@@ -162,7 +197,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
|
||||
# === upload subcommand ===
|
||||
upload_parser = subparsers.add_parser(
|
||||
"upload", help="Upload skill to Claude", description="Upload .zip file to Claude via Anthropic API"
|
||||
"upload",
|
||||
help="Upload skill to Claude",
|
||||
description="Upload .zip file to Claude via Anthropic API",
|
||||
)
|
||||
upload_parser.add_argument("zip_file", help=".zip file to upload")
|
||||
upload_parser.add_argument("--api-key", help="Anthropic API key")
|
||||
@@ -183,17 +220,26 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
help="Extract usage examples from test files",
|
||||
description="Analyze test files to extract real API usage patterns",
|
||||
)
|
||||
test_examples_parser.add_argument("directory", nargs="?", help="Directory containing test files")
|
||||
test_examples_parser.add_argument("--file", help="Single test file to analyze")
|
||||
test_examples_parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
|
||||
test_examples_parser.add_argument(
|
||||
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)"
|
||||
"directory", nargs="?", help="Directory containing test files"
|
||||
)
|
||||
test_examples_parser.add_argument("--file", help="Single test file to analyze")
|
||||
test_examples_parser.add_argument(
|
||||
"--language", help="Filter by programming language (python, javascript, etc.)"
|
||||
)
|
||||
test_examples_parser.add_argument(
|
||||
"--min-confidence",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
|
||||
)
|
||||
test_examples_parser.add_argument(
|
||||
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
|
||||
)
|
||||
test_examples_parser.add_argument("--json", action="store_true", help="Output JSON format")
|
||||
test_examples_parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
|
||||
test_examples_parser.add_argument(
|
||||
"--markdown", action="store_true", help="Output Markdown format"
|
||||
)
|
||||
|
||||
# === install-agent subcommand ===
|
||||
install_agent_parser = subparsers.add_parser(
|
||||
@@ -201,9 +247,13 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
help="Install skill to AI agent directories",
|
||||
description="Copy skill to agent-specific installation directories",
|
||||
)
|
||||
install_agent_parser.add_argument("skill_directory", help="Skill directory path (e.g., output/react/)")
|
||||
install_agent_parser.add_argument(
|
||||
"--agent", required=True, help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)"
|
||||
"skill_directory", help="Skill directory path (e.g., output/react/)"
|
||||
)
|
||||
install_agent_parser.add_argument(
|
||||
"--agent",
|
||||
required=True,
|
||||
help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)",
|
||||
)
|
||||
install_agent_parser.add_argument(
|
||||
"--force", action="store_true", help="Overwrite existing installation without asking"
|
||||
@@ -219,18 +269,32 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
||||
description="One-command skill installation (AI enhancement MANDATORY)",
|
||||
)
|
||||
install_parser.add_argument(
|
||||
"--config", required=True, help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')"
|
||||
"--config",
|
||||
required=True,
|
||||
help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
|
||||
)
|
||||
install_parser.add_argument(
|
||||
"--destination", default="output", help="Output directory (default: output/)"
|
||||
)
|
||||
install_parser.add_argument(
|
||||
"--no-upload", action="store_true", help="Skip automatic upload to Claude"
|
||||
)
|
||||
install_parser.add_argument(
|
||||
"--unlimited", action="store_true", help="Remove page limits during scraping"
|
||||
)
|
||||
install_parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Preview workflow without executing"
|
||||
)
|
||||
install_parser.add_argument("--destination", default="output", help="Output directory (default: output/)")
|
||||
install_parser.add_argument("--no-upload", action="store_true", help="Skip automatic upload to Claude")
|
||||
install_parser.add_argument("--unlimited", action="store_true", help="Remove page limits during scraping")
|
||||
install_parser.add_argument("--dry-run", action="store_true", help="Preview workflow without executing")
|
||||
|
||||
# === resume subcommand ===
|
||||
resume_parser = subparsers.add_parser(
|
||||
"resume", help="Resume interrupted scraping job", description="Continue from saved progress checkpoint"
|
||||
"resume",
|
||||
help="Resume interrupted scraping job",
|
||||
description="Continue from saved progress checkpoint",
|
||||
)
|
||||
resume_parser.add_argument(
|
||||
"job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)"
|
||||
)
|
||||
resume_parser.add_argument("job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)")
|
||||
resume_parser.add_argument("--list", action="store_true", help="List all resumable jobs")
|
||||
resume_parser.add_argument("--clean", action="store_true", help="Clean up old progress files")
|
||||
|
||||
|
||||
@@ -38,7 +38,9 @@ logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def categorize_issues_by_topic(problems: list[dict], solutions: list[dict], topics: list[str]) -> dict[str, list[dict]]:
|
||||
def categorize_issues_by_topic(
|
||||
problems: list[dict], solutions: list[dict], topics: list[str]
|
||||
) -> dict[str, list[dict]]:
|
||||
"""
|
||||
Categorize GitHub issues by topic keywords.
|
||||
|
||||
@@ -85,7 +87,10 @@ def categorize_issues_by_topic(problems: list[dict], solutions: list[dict], topi
|
||||
|
||||
|
||||
def generate_hybrid_content(
|
||||
api_data: dict, github_docs: dict | None, github_insights: dict | None, conflicts: list[Conflict]
|
||||
api_data: dict,
|
||||
github_docs: dict | None,
|
||||
github_insights: dict | None,
|
||||
conflicts: list[Conflict],
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Generate hybrid content combining API data with GitHub context.
|
||||
@@ -133,7 +138,11 @@ def generate_hybrid_content(
|
||||
hybrid["github_context"]["top_labels"] = github_insights.get("top_labels", [])
|
||||
|
||||
# Add conflict summary
|
||||
hybrid["conflict_summary"] = {"total_conflicts": len(conflicts), "by_type": {}, "by_severity": {}}
|
||||
hybrid["conflict_summary"] = {
|
||||
"total_conflicts": len(conflicts),
|
||||
"by_type": {},
|
||||
"by_severity": {},
|
||||
}
|
||||
|
||||
for conflict in conflicts:
|
||||
# Count by type
|
||||
@@ -159,7 +168,9 @@ def generate_hybrid_content(
|
||||
return hybrid
|
||||
|
||||
|
||||
def _match_issues_to_apis(apis: dict[str, dict], problems: list[dict], solutions: list[dict]) -> dict[str, list[dict]]:
|
||||
def _match_issues_to_apis(
|
||||
apis: dict[str, dict], problems: list[dict], solutions: list[dict]
|
||||
) -> dict[str, list[dict]]:
|
||||
"""
|
||||
Match GitHub issues to specific APIs by keyword matching.
|
||||
|
||||
@@ -651,7 +662,12 @@ read -p "Press Enter when merge is complete..."
|
||||
|
||||
# Open new terminal with Claude Code
|
||||
# Try different terminal emulators
|
||||
terminals = [["x-terminal-emulator", "-e"], ["gnome-terminal", "--"], ["xterm", "-e"], ["konsole", "-e"]]
|
||||
terminals = [
|
||||
["x-terminal-emulator", "-e"],
|
||||
["gnome-terminal", "--"],
|
||||
["xterm", "-e"],
|
||||
["konsole", "-e"],
|
||||
]
|
||||
|
||||
for terminal_cmd in terminals:
|
||||
try:
|
||||
@@ -735,7 +751,9 @@ def merge_sources(
|
||||
if github_streams:
|
||||
logger.info("GitHub streams available for multi-layer merge")
|
||||
if github_streams.docs_stream:
|
||||
logger.info(f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files")
|
||||
logger.info(
|
||||
f" - Docs stream: README, {len(github_streams.docs_stream.docs_files)} docs files"
|
||||
)
|
||||
if github_streams.insights_stream:
|
||||
problems = len(github_streams.insights_stream.common_problems)
|
||||
solutions = len(github_streams.insights_stream.known_solutions)
|
||||
@@ -766,7 +784,11 @@ if __name__ == "__main__":
|
||||
parser.add_argument("github_data", help="Path to GitHub data JSON")
|
||||
parser.add_argument("--output", "-o", default="merged_data.json", help="Output file path")
|
||||
parser.add_argument(
|
||||
"--mode", "-m", choices=["rule-based", "claude-enhanced"], default="rule-based", help="Merge mode"
|
||||
"--mode",
|
||||
"-m",
|
||||
choices=["rule-based", "claude-enhanced"],
|
||||
default="rule-based",
|
||||
help="Merge mode",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -17,12 +17,22 @@ from pathlib import Path
|
||||
# Import utilities
|
||||
try:
|
||||
from quality_checker import SkillQualityChecker, print_report
|
||||
from utils import format_file_size, open_folder, print_upload_instructions, validate_skill_directory
|
||||
from utils import (
|
||||
format_file_size,
|
||||
open_folder,
|
||||
print_upload_instructions,
|
||||
validate_skill_directory,
|
||||
)
|
||||
except ImportError:
|
||||
# If running from different directory, add cli to path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from quality_checker import SkillQualityChecker, print_report
|
||||
from utils import format_file_size, open_folder, print_upload_instructions, validate_skill_directory
|
||||
from utils import (
|
||||
format_file_size,
|
||||
open_folder,
|
||||
print_upload_instructions,
|
||||
validate_skill_directory,
|
||||
)
|
||||
|
||||
|
||||
def package_skill(skill_dir, open_folder_after=True, skip_quality_check=False, target="claude"):
|
||||
@@ -135,9 +145,13 @@ Examples:
|
||||
|
||||
parser.add_argument("skill_dir", help="Path to skill directory (e.g., output/react/)")
|
||||
|
||||
parser.add_argument("--no-open", action="store_true", help="Do not open the output folder after packaging")
|
||||
parser.add_argument(
|
||||
"--no-open", action="store_true", help="Do not open the output folder after packaging"
|
||||
)
|
||||
|
||||
parser.add_argument("--skip-quality-check", action="store_true", help="Skip quality checks before packaging")
|
||||
parser.add_argument(
|
||||
"--skip-quality-check", action="store_true", help="Skip quality checks before packaging"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--target",
|
||||
@@ -147,7 +161,9 @@ Examples:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--upload", action="store_true", help="Automatically upload after packaging (requires platform API key)"
|
||||
"--upload",
|
||||
action="store_true",
|
||||
help="Automatically upload after packaging (requires platform API key)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -135,7 +135,9 @@ class BasePatternDetector:
|
||||
# Default: no deep detection
|
||||
return None
|
||||
|
||||
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None:
|
||||
def detect_full(
|
||||
self, class_sig, all_classes: list, file_content: str
|
||||
) -> PatternInstance | None:
|
||||
"""
|
||||
Full detection using behavioral analysis.
|
||||
|
||||
@@ -150,7 +152,9 @@ class BasePatternDetector:
|
||||
# Default: no full detection
|
||||
return None
|
||||
|
||||
def detect(self, class_sig, all_classes: list, file_content: str | None = None) -> PatternInstance | None:
|
||||
def detect(
|
||||
self, class_sig, all_classes: list, file_content: str | None = None
|
||||
) -> PatternInstance | None:
|
||||
"""
|
||||
Detect pattern based on configured depth.
|
||||
|
||||
@@ -273,7 +277,9 @@ class PatternRecognizer:
|
||||
for class_sig in class_sigs:
|
||||
for detector in self.detectors:
|
||||
pattern = detector.detect(
|
||||
class_sig=class_sig, all_classes=class_sigs, file_content=content if self.depth == "full" else None
|
||||
class_sig=class_sig,
|
||||
all_classes=class_sigs,
|
||||
file_content=content if self.depth == "full" else None,
|
||||
)
|
||||
|
||||
if pattern:
|
||||
@@ -327,7 +333,9 @@ class PatternRecognizer:
|
||||
params = []
|
||||
for param in method.get("parameters", []):
|
||||
param_obj = SimpleNamespace(
|
||||
name=param.get("name", ""), type_hint=param.get("type_hint"), default=param.get("default")
|
||||
name=param.get("name", ""),
|
||||
type_hint=param.get("type_hint"),
|
||||
default=param.get("default"),
|
||||
)
|
||||
params.append(param_obj)
|
||||
|
||||
@@ -397,7 +405,14 @@ class SingletonDetector(BasePatternDetector):
|
||||
confidence = 0.0
|
||||
|
||||
# Check for instance method (getInstance, instance, get_instance, etc.)
|
||||
instance_methods = ["getInstance", "instance", "get_instance", "Instance", "GetInstance", "INSTANCE"]
|
||||
instance_methods = [
|
||||
"getInstance",
|
||||
"instance",
|
||||
"get_instance",
|
||||
"Instance",
|
||||
"GetInstance",
|
||||
"INSTANCE",
|
||||
]
|
||||
|
||||
has_instance_method = False
|
||||
for method in class_sig.methods:
|
||||
@@ -438,7 +453,9 @@ class SingletonDetector(BasePatternDetector):
|
||||
# Fallback to surface detection
|
||||
return self.detect_surface(class_sig, all_classes)
|
||||
|
||||
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None:
|
||||
def detect_full(
|
||||
self, class_sig, all_classes: list, file_content: str
|
||||
) -> PatternInstance | None:
|
||||
"""
|
||||
Full behavioral analysis for Singleton.
|
||||
|
||||
@@ -767,7 +784,9 @@ class StrategyDetector(BasePatternDetector):
|
||||
siblings = [
|
||||
cls.name
|
||||
for cls in all_classes
|
||||
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name
|
||||
if cls.base_classes
|
||||
and base_class in cls.base_classes
|
||||
and cls.name != class_sig.name
|
||||
]
|
||||
|
||||
if siblings:
|
||||
@@ -885,7 +904,9 @@ class DecoratorDetector(BasePatternDetector):
|
||||
siblings = [
|
||||
cls.name
|
||||
for cls in all_classes
|
||||
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name
|
||||
if cls.base_classes
|
||||
and base_class in cls.base_classes
|
||||
and cls.name != class_sig.name
|
||||
]
|
||||
|
||||
if siblings:
|
||||
@@ -898,7 +919,10 @@ class DecoratorDetector(BasePatternDetector):
|
||||
# Check if takes object parameter (not just self)
|
||||
if len(init_method.parameters) > 1: # More than just 'self'
|
||||
param_names = [p.name for p in init_method.parameters if p.name != "self"]
|
||||
if any(name in ["wrapped", "component", "inner", "obj", "target"] for name in param_names):
|
||||
if any(
|
||||
name in ["wrapped", "component", "inner", "obj", "target"]
|
||||
for name in param_names
|
||||
):
|
||||
evidence.append(f"Takes wrapped object in constructor: {param_names}")
|
||||
confidence += 0.4
|
||||
|
||||
@@ -969,7 +993,8 @@ class BuilderDetector(BasePatternDetector):
|
||||
# Check for build/create terminal method
|
||||
terminal_methods = ["build", "create", "execute", "construct", "make"]
|
||||
has_terminal = any(
|
||||
m.name.lower() in terminal_methods or m.name.lower().startswith("build") for m in class_sig.methods
|
||||
m.name.lower() in terminal_methods or m.name.lower().startswith("build")
|
||||
for m in class_sig.methods
|
||||
)
|
||||
|
||||
if has_terminal:
|
||||
@@ -979,7 +1004,9 @@ class BuilderDetector(BasePatternDetector):
|
||||
# Check for setter methods (with_, set_, add_)
|
||||
setter_prefixes = ["with", "set", "add", "configure"]
|
||||
setter_count = sum(
|
||||
1 for m in class_sig.methods if any(m.name.lower().startswith(prefix) for prefix in setter_prefixes)
|
||||
1
|
||||
for m in class_sig.methods
|
||||
if any(m.name.lower().startswith(prefix) for prefix in setter_prefixes)
|
||||
)
|
||||
|
||||
if setter_count >= 3:
|
||||
@@ -1006,7 +1033,9 @@ class BuilderDetector(BasePatternDetector):
|
||||
# Fallback to surface
|
||||
return self.detect_surface(class_sig, all_classes)
|
||||
|
||||
def detect_full(self, class_sig, all_classes: list, file_content: str) -> PatternInstance | None:
|
||||
def detect_full(
|
||||
self, class_sig, all_classes: list, file_content: str
|
||||
) -> PatternInstance | None:
|
||||
"""Full behavioral analysis for Builder"""
|
||||
# Start with deep detection
|
||||
pattern = self.detect_deep(class_sig, all_classes)
|
||||
@@ -1186,7 +1215,9 @@ class CommandDetector(BasePatternDetector):
|
||||
has_execute = any(m.name.lower() in execute_methods for m in class_sig.methods)
|
||||
|
||||
if has_execute:
|
||||
method_name = next(m.name for m in class_sig.methods if m.name.lower() in execute_methods)
|
||||
method_name = next(
|
||||
m.name for m in class_sig.methods if m.name.lower() in execute_methods
|
||||
)
|
||||
evidence.append(f"Has execute method: {method_name}()")
|
||||
confidence += 0.5
|
||||
|
||||
@@ -1299,7 +1330,9 @@ class TemplateMethodDetector(BasePatternDetector):
|
||||
]
|
||||
|
||||
hook_methods = [
|
||||
m.name for m in class_sig.methods if any(keyword in m.name.lower() for keyword in hook_keywords)
|
||||
m.name
|
||||
for m in class_sig.methods
|
||||
if any(keyword in m.name.lower() for keyword in hook_keywords)
|
||||
]
|
||||
|
||||
if len(hook_methods) >= 2:
|
||||
@@ -1307,7 +1340,11 @@ class TemplateMethodDetector(BasePatternDetector):
|
||||
confidence += 0.3
|
||||
|
||||
# Check for abstract methods (no implementation or pass/raise)
|
||||
abstract_methods = [m.name for m in class_sig.methods if m.name.startswith("_") or "abstract" in m.name.lower()]
|
||||
abstract_methods = [
|
||||
m.name
|
||||
for m in class_sig.methods
|
||||
if m.name.startswith("_") or "abstract" in m.name.lower()
|
||||
]
|
||||
|
||||
if abstract_methods:
|
||||
evidence.append(f"Has abstract methods: {', '.join(abstract_methods[:2])}")
|
||||
@@ -1383,7 +1420,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
|
||||
# Check for handle/process method
|
||||
handle_methods = ["handle", "process", "execute", "filter", "middleware"]
|
||||
has_handle = any(
|
||||
m.name.lower() in handle_methods or m.name.lower().startswith("handle") for m in class_sig.methods
|
||||
m.name.lower() in handle_methods or m.name.lower().startswith("handle")
|
||||
for m in class_sig.methods
|
||||
)
|
||||
|
||||
if has_handle:
|
||||
@@ -1405,7 +1443,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
|
||||
|
||||
# Check for set_next() method
|
||||
has_set_next = any(
|
||||
"next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower()) for m in class_sig.methods
|
||||
"next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower())
|
||||
for m in class_sig.methods
|
||||
)
|
||||
|
||||
if has_set_next:
|
||||
@@ -1419,7 +1458,9 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
|
||||
siblings = [
|
||||
cls.name
|
||||
for cls in all_classes
|
||||
if cls.base_classes and base_class in cls.base_classes and cls.name != class_sig.name
|
||||
if cls.base_classes
|
||||
and base_class in cls.base_classes
|
||||
and cls.name != class_sig.name
|
||||
]
|
||||
|
||||
if siblings and has_next_ref:
|
||||
@@ -1625,16 +1666,22 @@ Supported Languages:
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("--file", action="append", help="Source file to analyze (can be specified multiple times)")
|
||||
parser.add_argument(
|
||||
"--file", action="append", help="Source file to analyze (can be specified multiple times)"
|
||||
)
|
||||
parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)")
|
||||
parser.add_argument("--output", help="Output directory for results (default: current directory)")
|
||||
parser.add_argument(
|
||||
"--output", help="Output directory for results (default: current directory)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--depth",
|
||||
choices=["surface", "deep", "full"],
|
||||
default="deep",
|
||||
help="Detection depth: surface (fast), deep (default), full (thorough)",
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="Output JSON format instead of human-readable")
|
||||
parser.add_argument(
|
||||
"--json", action="store_true", help="Output JSON format instead of human-readable"
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -1697,7 +1744,9 @@ Supported Languages:
|
||||
if not args.json and args.verbose:
|
||||
print(f"\n{file_path}:")
|
||||
for pattern in report.patterns:
|
||||
print(f" [{pattern.pattern_type}] {pattern.class_name} (confidence: {pattern.confidence:.2f})")
|
||||
print(
|
||||
f" [{pattern.pattern_type}] {pattern.class_name} (confidence: {pattern.confidence:.2f})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
if args.verbose:
|
||||
@@ -1737,11 +1786,15 @@ Supported Languages:
|
||||
pattern_counts = {}
|
||||
for report in all_reports:
|
||||
for pattern in report.patterns:
|
||||
pattern_counts[pattern.pattern_type] = pattern_counts.get(pattern.pattern_type, 0) + 1
|
||||
pattern_counts[pattern.pattern_type] = (
|
||||
pattern_counts.get(pattern.pattern_type, 0) + 1
|
||||
)
|
||||
|
||||
if pattern_counts:
|
||||
print("Pattern Summary:")
|
||||
for pattern_type, count in sorted(pattern_counts.items(), key=lambda x: x[1], reverse=True):
|
||||
for pattern_type, count in sorted(
|
||||
pattern_counts.items(), key=lambda x: x[1], reverse=True
|
||||
):
|
||||
print(f" {pattern_type}: {count}")
|
||||
print()
|
||||
|
||||
|
||||
@@ -196,7 +196,9 @@ class PDFExtractor:
|
||||
"col_count": len(tab.extract()[0]) if tab.extract() else 0,
|
||||
}
|
||||
tables.append(table_data)
|
||||
self.log(f" Found table {idx}: {table_data['row_count']}x{table_data['col_count']}")
|
||||
self.log(
|
||||
f" Found table {idx}: {table_data['row_count']}x{table_data['col_count']}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.log(f" Table extraction failed: {e}")
|
||||
@@ -294,7 +296,9 @@ class PDFExtractor:
|
||||
issues.append("May be natural language, not code")
|
||||
|
||||
# Check code/comment ratio
|
||||
comment_lines = sum(1 for line in code.split("\n") if line.strip().startswith(("#", "//", "/*", "*", "--")))
|
||||
comment_lines = sum(
|
||||
1 for line in code.split("\n") if line.strip().startswith(("#", "//", "/*", "*", "--"))
|
||||
)
|
||||
total_lines = len([l for l in code.split("\n") if l.strip()])
|
||||
if total_lines > 0 and comment_lines / total_lines > 0.7:
|
||||
issues.append("Mostly comments")
|
||||
@@ -501,11 +505,17 @@ class PDFExtractor:
|
||||
# Common code patterns that span multiple lines
|
||||
patterns = [
|
||||
# Function definitions
|
||||
(r"((?:def|function|func|fn|public|private)\s+\w+\s*\([^)]*\)\s*[{:]?[^}]*[}]?)", "function"),
|
||||
(
|
||||
r"((?:def|function|func|fn|public|private)\s+\w+\s*\([^)]*\)\s*[{:]?[^}]*[}]?)",
|
||||
"function",
|
||||
),
|
||||
# Class definitions
|
||||
(r"(class\s+\w+[^{]*\{[^}]*\})", "class"),
|
||||
# Import statements block
|
||||
(r"((?:import|require|use|include)[^\n]+(?:\n(?:import|require|use|include)[^\n]+)*)", "imports"),
|
||||
(
|
||||
r"((?:import|require|use|include)[^\n]+(?:\n(?:import|require|use|include)[^\n]+)*)",
|
||||
"imports",
|
||||
),
|
||||
]
|
||||
|
||||
for pattern, block_type in patterns:
|
||||
@@ -628,7 +638,15 @@ class PDFExtractor:
|
||||
"""
|
||||
if self.chunk_size == 0:
|
||||
# No chunking - return all pages as one chunk
|
||||
return [{"chunk_number": 1, "start_page": 1, "end_page": len(pages), "pages": pages, "chapter_title": None}]
|
||||
return [
|
||||
{
|
||||
"chunk_number": 1,
|
||||
"start_page": 1,
|
||||
"end_page": len(pages),
|
||||
"pages": pages,
|
||||
"chapter_title": None,
|
||||
}
|
||||
]
|
||||
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
@@ -812,7 +830,9 @@ class PDFExtractor:
|
||||
code_samples = [c for c in code_samples if c["quality_score"] >= self.min_quality]
|
||||
filtered_count = code_samples_before - len(code_samples)
|
||||
if filtered_count > 0:
|
||||
self.log(f" Filtered out {filtered_count} low-quality code blocks (min_quality={self.min_quality})")
|
||||
self.log(
|
||||
f" Filtered out {filtered_count} low-quality code blocks (min_quality={self.min_quality})"
|
||||
)
|
||||
|
||||
# Sort by quality score (highest first)
|
||||
code_samples.sort(key=lambda x: x["quality_score"], reverse=True)
|
||||
@@ -891,7 +911,9 @@ class PDFExtractor:
|
||||
|
||||
# Show feature status
|
||||
if self.use_ocr:
|
||||
status = "✅ enabled" if TESSERACT_AVAILABLE else "⚠️ not available (install pytesseract)"
|
||||
status = (
|
||||
"✅ enabled" if TESSERACT_AVAILABLE else "⚠️ not available (install pytesseract)"
|
||||
)
|
||||
print(f" OCR: {status}")
|
||||
if self.extract_tables:
|
||||
print(" Table extraction: ✅ enabled")
|
||||
@@ -905,7 +927,9 @@ class PDFExtractor:
|
||||
|
||||
# Extract each page (with parallel processing - Priority 3)
|
||||
if self.parallel and CONCURRENT_AVAILABLE and len(self.doc) > 5:
|
||||
print(f"🚀 Extracting {len(self.doc)} pages in parallel ({self.max_workers} workers)...")
|
||||
print(
|
||||
f"🚀 Extracting {len(self.doc)} pages in parallel ({self.max_workers} workers)..."
|
||||
)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
page_numbers = list(range(len(self.doc)))
|
||||
self.pages = list(executor.map(self.extract_page, page_numbers))
|
||||
@@ -962,7 +986,11 @@ class PDFExtractor:
|
||||
for chunk in chunks:
|
||||
if chunk["chapter_title"]:
|
||||
chapters.append(
|
||||
{"title": chunk["chapter_title"], "start_page": chunk["start_page"], "end_page": chunk["end_page"]}
|
||||
{
|
||||
"title": chunk["chapter_title"],
|
||||
"start_page": chunk["start_page"],
|
||||
"end_page": chunk["end_page"],
|
||||
}
|
||||
)
|
||||
|
||||
result = {
|
||||
@@ -1042,12 +1070,21 @@ Examples:
|
||||
parser.add_argument("-o", "--output", help="Output JSON file path (default: print to stdout)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
|
||||
parser.add_argument("--chunk-size", type=int, default=10, help="Pages per chunk (0 = no chunking, default: 10)")
|
||||
parser.add_argument("--no-merge", action="store_true", help="Disable merging code blocks across pages")
|
||||
parser.add_argument(
|
||||
"--min-quality", type=float, default=0.0, help="Minimum code quality score (0-10, default: 0 = no filtering)"
|
||||
"--chunk-size", type=int, default=10, help="Pages per chunk (0 = no chunking, default: 10)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-merge", action="store_true", help="Disable merging code blocks across pages"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-quality",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="Minimum code quality score (0-10, default: 0 = no filtering)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--extract-images", action="store_true", help="Extract images to files (NEW in B1.5)"
|
||||
)
|
||||
parser.add_argument("--extract-images", action="store_true", help="Extract images to files (NEW in B1.5)")
|
||||
parser.add_argument(
|
||||
"--image-dir",
|
||||
type=str,
|
||||
@@ -1062,12 +1099,22 @@ Examples:
|
||||
)
|
||||
|
||||
# Advanced features (Priority 2 & 3)
|
||||
parser.add_argument("--ocr", action="store_true", help="Use OCR for scanned PDFs (requires pytesseract)")
|
||||
parser.add_argument(
|
||||
"--ocr", action="store_true", help="Use OCR for scanned PDFs (requires pytesseract)"
|
||||
)
|
||||
parser.add_argument("--password", type=str, default=None, help="Password for encrypted PDF")
|
||||
parser.add_argument("--extract-tables", action="store_true", help="Extract tables from PDF (Priority 2)")
|
||||
parser.add_argument("--parallel", action="store_true", help="Process pages in parallel (Priority 3)")
|
||||
parser.add_argument("--workers", type=int, default=None, help="Number of parallel workers (default: CPU count)")
|
||||
parser.add_argument("--no-cache", action="store_true", help="Disable caching of expensive operations")
|
||||
parser.add_argument(
|
||||
"--extract-tables", action="store_true", help="Extract tables from PDF (Priority 2)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--parallel", action="store_true", help="Process pages in parallel (Priority 3)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers", type=int, default=None, help="Number of parallel workers (default: CPU count)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-cache", action="store_true", help="Disable caching of expensive operations"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -54,7 +54,11 @@ def infer_description_from_pdf(pdf_metadata: dict = None, name: str = "") -> str
|
||||
return f"Use when working with {title.lower()}"
|
||||
|
||||
# Improved fallback
|
||||
return f"Use when referencing {name} documentation" if name else "Use when referencing this documentation"
|
||||
return (
|
||||
f"Use when referencing {name} documentation"
|
||||
if name
|
||||
else "Use when referencing this documentation"
|
||||
)
|
||||
|
||||
|
||||
class PDFToSkillConverter:
|
||||
@@ -65,7 +69,9 @@ class PDFToSkillConverter:
|
||||
self.name = config["name"]
|
||||
self.pdf_path = config.get("pdf_path", "")
|
||||
# Set initial description (will be improved after extraction if metadata available)
|
||||
self.description = config.get("description", f"Use when referencing {self.name} documentation")
|
||||
self.description = config.get(
|
||||
"description", f"Use when referencing {self.name} documentation"
|
||||
)
|
||||
|
||||
# Paths
|
||||
self.skill_dir = f"output/{self.name}"
|
||||
@@ -151,7 +157,10 @@ class PDFToSkillConverter:
|
||||
if isinstance(first_value, list) and first_value and isinstance(first_value[0], dict):
|
||||
# Already categorized - convert to expected format
|
||||
for cat_key, pages in self.categories.items():
|
||||
categorized[cat_key] = {"title": cat_key.replace("_", " ").title(), "pages": pages}
|
||||
categorized[cat_key] = {
|
||||
"title": cat_key.replace("_", " ").title(),
|
||||
"pages": pages,
|
||||
}
|
||||
else:
|
||||
# Keyword-based categorization
|
||||
# Initialize categories
|
||||
@@ -171,7 +180,8 @@ class PDFToSkillConverter:
|
||||
score = sum(
|
||||
1
|
||||
for kw in keywords
|
||||
if isinstance(kw, str) and (kw.lower() in text or kw.lower() in headings_text)
|
||||
if isinstance(kw, str)
|
||||
and (kw.lower() in text or kw.lower() in headings_text)
|
||||
)
|
||||
else:
|
||||
score = 0
|
||||
@@ -490,7 +500,13 @@ class PDFToSkillConverter:
|
||||
for keyword in pattern_keywords:
|
||||
if keyword in heading_text:
|
||||
page_num = page.get("page_number", 0)
|
||||
patterns.append({"type": keyword.title(), "heading": heading.get("text", ""), "page": page_num})
|
||||
patterns.append(
|
||||
{
|
||||
"type": keyword.title(),
|
||||
"heading": heading.get("text", ""),
|
||||
"page": page_num,
|
||||
}
|
||||
)
|
||||
break # Only add once per heading
|
||||
|
||||
if not patterns:
|
||||
@@ -526,7 +542,8 @@ class PDFToSkillConverter:
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Convert PDF documentation to Claude skill", formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
description="Convert PDF documentation to Claude skill",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
|
||||
parser.add_argument("--config", help="PDF config JSON file")
|
||||
@@ -548,7 +565,10 @@ def main():
|
||||
elif args.from_json:
|
||||
# Build from extracted JSON
|
||||
name = Path(args.from_json).stem.replace("_extracted", "")
|
||||
config = {"name": name, "description": args.description or f"Use when referencing {name} documentation"}
|
||||
config = {
|
||||
"name": name,
|
||||
"description": args.description or f"Use when referencing {name} documentation",
|
||||
}
|
||||
converter = PDFToSkillConverter(config)
|
||||
converter.load_extracted_data(args.from_json)
|
||||
converter.build_skill()
|
||||
@@ -561,7 +581,12 @@ def main():
|
||||
"name": args.name,
|
||||
"pdf_path": args.pdf,
|
||||
"description": args.description or f"Use when referencing {args.name} documentation",
|
||||
"extract_options": {"chunk_size": 10, "min_quality": 5.0, "extract_images": True, "min_image_size": 100},
|
||||
"extract_options": {
|
||||
"chunk_size": 10,
|
||||
"min_quality": 5.0,
|
||||
"extract_images": True,
|
||||
"min_image_size": 100,
|
||||
},
|
||||
}
|
||||
|
||||
# Create converter
|
||||
|
||||
@@ -138,7 +138,9 @@ class SkillQualityChecker:
|
||||
# Check references directory exists
|
||||
if not self.references_dir.exists():
|
||||
self.report.add_warning(
|
||||
"structure", "references/ directory not found - skill may be incomplete", str(self.references_dir)
|
||||
"structure",
|
||||
"references/ directory not found - skill may be incomplete",
|
||||
str(self.references_dir),
|
||||
)
|
||||
elif not list(self.references_dir.rglob("*.md")):
|
||||
self.report.add_warning(
|
||||
@@ -197,7 +199,9 @@ class SkillQualityChecker:
|
||||
|
||||
if sections < 4:
|
||||
self.report.add_warning(
|
||||
"enhancement", f"Only {sections} sections found - SKILL.md may be too basic", "SKILL.md"
|
||||
"enhancement",
|
||||
f"Only {sections} sections found - SKILL.md may be too basic",
|
||||
"SKILL.md",
|
||||
)
|
||||
else:
|
||||
self.report.add_info("enhancement", f"✓ Found {sections} sections", "SKILL.md")
|
||||
@@ -211,7 +215,9 @@ class SkillQualityChecker:
|
||||
|
||||
# Check YAML frontmatter
|
||||
if not content.startswith("---"):
|
||||
self.report.add_error("content", "Missing YAML frontmatter - SKILL.md must start with ---", "SKILL.md", 1)
|
||||
self.report.add_error(
|
||||
"content", "Missing YAML frontmatter - SKILL.md must start with ---", "SKILL.md", 1
|
||||
)
|
||||
else:
|
||||
# Extract frontmatter
|
||||
try:
|
||||
@@ -221,26 +227,38 @@ class SkillQualityChecker:
|
||||
|
||||
# Check for required fields
|
||||
if "name:" not in frontmatter:
|
||||
self.report.add_error("content", 'Missing "name:" field in YAML frontmatter', "SKILL.md", 2)
|
||||
self.report.add_error(
|
||||
"content", 'Missing "name:" field in YAML frontmatter', "SKILL.md", 2
|
||||
)
|
||||
|
||||
# Check for description
|
||||
if "description:" in frontmatter:
|
||||
self.report.add_info("content", "✓ YAML frontmatter includes description", "SKILL.md")
|
||||
self.report.add_info(
|
||||
"content", "✓ YAML frontmatter includes description", "SKILL.md"
|
||||
)
|
||||
else:
|
||||
self.report.add_error("content", "Invalid YAML frontmatter format", "SKILL.md", 1)
|
||||
self.report.add_error(
|
||||
"content", "Invalid YAML frontmatter format", "SKILL.md", 1
|
||||
)
|
||||
except Exception as e:
|
||||
self.report.add_error("content", f"Error parsing YAML frontmatter: {e}", "SKILL.md", 1)
|
||||
self.report.add_error(
|
||||
"content", f"Error parsing YAML frontmatter: {e}", "SKILL.md", 1
|
||||
)
|
||||
|
||||
# Check code block language tags
|
||||
code_blocks_without_lang = re.findall(r"```\n[^`]", content)
|
||||
if code_blocks_without_lang:
|
||||
self.report.add_warning(
|
||||
"content", f"Found {len(code_blocks_without_lang)} code blocks without language tags", "SKILL.md"
|
||||
"content",
|
||||
f"Found {len(code_blocks_without_lang)} code blocks without language tags",
|
||||
"SKILL.md",
|
||||
)
|
||||
|
||||
# Check for "When to Use" section
|
||||
if "when to use" not in content.lower():
|
||||
self.report.add_warning("content", 'Missing "When to Use This Skill" section', "SKILL.md")
|
||||
self.report.add_warning(
|
||||
"content", 'Missing "When to Use This Skill" section', "SKILL.md"
|
||||
)
|
||||
else:
|
||||
self.report.add_info("content", '✓ Found "When to Use" section', "SKILL.md")
|
||||
|
||||
@@ -248,7 +266,9 @@ class SkillQualityChecker:
|
||||
if self.references_dir.exists():
|
||||
ref_files = list(self.references_dir.rglob("*.md"))
|
||||
if ref_files:
|
||||
self.report.add_info("content", f"✓ Found {len(ref_files)} reference files", "references/")
|
||||
self.report.add_info(
|
||||
"content", f"✓ Found {len(ref_files)} reference files", "references/"
|
||||
)
|
||||
|
||||
# Check if references are mentioned in SKILL.md
|
||||
mentioned_refs = 0
|
||||
@@ -258,7 +278,9 @@ class SkillQualityChecker:
|
||||
|
||||
if mentioned_refs == 0:
|
||||
self.report.add_warning(
|
||||
"content", "Reference files exist but none are mentioned in SKILL.md", "SKILL.md"
|
||||
"content",
|
||||
"Reference files exist but none are mentioned in SKILL.md",
|
||||
"SKILL.md",
|
||||
)
|
||||
|
||||
def _check_links(self):
|
||||
@@ -295,7 +317,9 @@ class SkillQualityChecker:
|
||||
if links:
|
||||
internal_links = [l for t, l in links if not l.startswith("http")]
|
||||
if internal_links:
|
||||
self.report.add_info("links", f"✓ All {len(internal_links)} internal links are valid", "SKILL.md")
|
||||
self.report.add_info(
|
||||
"links", f"✓ All {len(internal_links)} internal links are valid", "SKILL.md"
|
||||
)
|
||||
|
||||
def _check_skill_completeness(self):
|
||||
"""Check skill completeness based on best practices.
|
||||
@@ -316,9 +340,13 @@ class SkillQualityChecker:
|
||||
r"requirements?:",
|
||||
r"make\s+sure\s+you\s+have",
|
||||
]
|
||||
has_grounding = any(re.search(pattern, content, re.IGNORECASE) for pattern in grounding_patterns)
|
||||
has_grounding = any(
|
||||
re.search(pattern, content, re.IGNORECASE) for pattern in grounding_patterns
|
||||
)
|
||||
if has_grounding:
|
||||
self.report.add_info("completeness", "✓ Found verification/prerequisites section", "SKILL.md")
|
||||
self.report.add_info(
|
||||
"completeness", "✓ Found verification/prerequisites section", "SKILL.md"
|
||||
)
|
||||
else:
|
||||
self.report.add_info(
|
||||
"completeness",
|
||||
@@ -334,12 +362,18 @@ class SkillQualityChecker:
|
||||
r"error\s+handling",
|
||||
r"when\s+things\s+go\s+wrong",
|
||||
]
|
||||
has_error_handling = any(re.search(pattern, content, re.IGNORECASE) for pattern in error_patterns)
|
||||
has_error_handling = any(
|
||||
re.search(pattern, content, re.IGNORECASE) for pattern in error_patterns
|
||||
)
|
||||
if has_error_handling:
|
||||
self.report.add_info("completeness", "✓ Found error handling/troubleshooting guidance", "SKILL.md")
|
||||
self.report.add_info(
|
||||
"completeness", "✓ Found error handling/troubleshooting guidance", "SKILL.md"
|
||||
)
|
||||
else:
|
||||
self.report.add_info(
|
||||
"completeness", "Consider adding troubleshooting section for common issues", "SKILL.md"
|
||||
"completeness",
|
||||
"Consider adding troubleshooting section for common issues",
|
||||
"SKILL.md",
|
||||
)
|
||||
|
||||
# Check for workflow steps (numbered or sequential indicators)
|
||||
@@ -351,10 +385,14 @@ class SkillQualityChecker:
|
||||
r"finally,?\s+",
|
||||
r"next,?\s+",
|
||||
]
|
||||
steps_found = sum(1 for pattern in step_patterns if re.search(pattern, content, re.IGNORECASE))
|
||||
steps_found = sum(
|
||||
1 for pattern in step_patterns if re.search(pattern, content, re.IGNORECASE)
|
||||
)
|
||||
if steps_found >= 3:
|
||||
self.report.add_info(
|
||||
"completeness", f"✓ Found clear workflow indicators ({steps_found} step markers)", "SKILL.md"
|
||||
"completeness",
|
||||
f"✓ Found clear workflow indicators ({steps_found} step markers)",
|
||||
"SKILL.md",
|
||||
)
|
||||
elif steps_found > 0:
|
||||
self.report.add_info(
|
||||
@@ -451,7 +489,9 @@ Examples:
|
||||
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Show all info messages")
|
||||
|
||||
parser.add_argument("--strict", action="store_true", help="Exit with error code if any warnings or errors found")
|
||||
parser.add_argument(
|
||||
"--strict", action="store_true", help="Exit with error code if any warnings or errors found"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -179,7 +179,12 @@ class RateLimitHandler:
|
||||
|
||||
reset_time = datetime.fromtimestamp(reset_timestamp) if reset_timestamp else None
|
||||
|
||||
return {"limit": limit, "remaining": remaining, "reset_timestamp": reset_timestamp, "reset_time": reset_time}
|
||||
return {
|
||||
"limit": limit,
|
||||
"remaining": remaining,
|
||||
"reset_timestamp": reset_timestamp,
|
||||
"reset_time": reset_time,
|
||||
}
|
||||
|
||||
def get_rate_limit_info(self) -> dict[str, Any]:
|
||||
"""
|
||||
|
||||
@@ -136,7 +136,9 @@ def print_summary(result):
|
||||
|
||||
# Category breakdown
|
||||
if hasattr(result, "test_results"):
|
||||
print(f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}")
|
||||
print(
|
||||
f"\n{ColoredTextTestResult.BOLD}Test Breakdown by Category:{ColoredTextTestResult.RESET}"
|
||||
)
|
||||
|
||||
categories = {}
|
||||
for status, test in result.test_results:
|
||||
@@ -164,11 +166,16 @@ def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run tests for Skill Seeker", formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
description="Run tests for Skill Seeker",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
|
||||
parser.add_argument("--suite", "-s", type=str, help="Run specific test suite (config, features, integration)")
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output (show each test)")
|
||||
parser.add_argument(
|
||||
"--suite", "-s", type=str, help="Run specific test suite (config, features, integration)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Verbose output (show each test)"
|
||||
)
|
||||
parser.add_argument("--quiet", "-q", action="store_true", help="Quiet output (minimal output)")
|
||||
parser.add_argument("--failfast", "-f", action="store_true", help="Stop on first failure")
|
||||
parser.add_argument("--list", "-l", action="store_true", help="List all available tests")
|
||||
@@ -188,7 +195,9 @@ def main():
|
||||
|
||||
# Discover or load specific suite
|
||||
if args.suite:
|
||||
print(f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n")
|
||||
print(
|
||||
f"Running test suite: {ColoredTextTestResult.BLUE}{args.suite}{ColoredTextTestResult.RESET}\n"
|
||||
)
|
||||
suite = run_specific_suite(args.suite)
|
||||
if suite is None:
|
||||
return 1
|
||||
|
||||
@@ -50,7 +50,9 @@ class ConfigSplitter:
|
||||
print("ℹ️ Single source unified config - no splitting needed")
|
||||
return "none"
|
||||
else:
|
||||
print(f"ℹ️ Multi-source unified config ({num_sources} sources) - source split recommended")
|
||||
print(
|
||||
f"ℹ️ Multi-source unified config ({num_sources} sources) - source split recommended"
|
||||
)
|
||||
return "source"
|
||||
# For unified configs, only 'source' and 'none' strategies are valid
|
||||
elif self.strategy in ["source", "none"]:
|
||||
@@ -77,7 +79,9 @@ class ConfigSplitter:
|
||||
print(f"ℹ️ Medium documentation ({max_pages} pages) - category split recommended")
|
||||
return "category"
|
||||
elif "categories" in self.config and len(self.config["categories"]) >= 3:
|
||||
print(f"ℹ️ Large documentation ({max_pages} pages) - router + categories recommended")
|
||||
print(
|
||||
f"ℹ️ Large documentation ({max_pages} pages) - router + categories recommended"
|
||||
)
|
||||
return "router"
|
||||
else:
|
||||
print(f"ℹ️ Large documentation ({max_pages} pages) - size-based split")
|
||||
@@ -227,7 +231,9 @@ class ConfigSplitter:
|
||||
"max_pages": 500, # Router only needs overview pages
|
||||
"_router": True,
|
||||
"_sub_skills": [cfg["name"] for cfg in sub_configs],
|
||||
"_routing_keywords": {cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs},
|
||||
"_routing_keywords": {
|
||||
cfg["name"]: list(cfg.get("categories", {}).keys()) for cfg in sub_configs
|
||||
},
|
||||
}
|
||||
|
||||
return router_config
|
||||
@@ -333,11 +339,17 @@ Config Types:
|
||||
help="Splitting strategy (default: auto)",
|
||||
)
|
||||
|
||||
parser.add_argument("--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)")
|
||||
parser.add_argument(
|
||||
"--target-pages", type=int, default=5000, help="Target pages per skill (default: 5000)"
|
||||
)
|
||||
|
||||
parser.add_argument("--output-dir", help="Output directory for configs (default: same as input)")
|
||||
parser.add_argument(
|
||||
"--output-dir", help="Output directory for configs (default: same as input)"
|
||||
)
|
||||
|
||||
parser.add_argument("--dry-run", action="store_true", help="Show what would be created without saving files")
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="Show what would be created without saving files"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
@@ -538,9 +538,13 @@ def _validate_patterns(patterns: dict[str, list[tuple[str, int]]]) -> None:
|
||||
raise ValueError(f"Pattern {i} for '{lang}' is not a (regex, weight) tuple: {item}")
|
||||
pattern, weight = item
|
||||
if not isinstance(pattern, str):
|
||||
raise ValueError(f"Pattern {i} for '{lang}': regex must be a string, got {type(pattern).__name__}")
|
||||
raise ValueError(
|
||||
f"Pattern {i} for '{lang}': regex must be a string, got {type(pattern).__name__}"
|
||||
)
|
||||
if not isinstance(weight, int) or weight < 1 or weight > 5:
|
||||
raise ValueError(f"Pattern {i} for '{lang}': weight must be int 1-5, got {weight!r}")
|
||||
raise ValueError(
|
||||
f"Pattern {i} for '{lang}': weight must be int 1-5, got {weight!r}"
|
||||
)
|
||||
|
||||
|
||||
# Validate patterns at module load time
|
||||
|
||||
@@ -251,7 +251,9 @@ class PythonTestAnalyzer:
|
||||
# Process each test method
|
||||
for node in class_node.body:
|
||||
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
|
||||
examples.extend(self._analyze_test_body(node, file_path, imports, setup_code=setup_code))
|
||||
examples.extend(
|
||||
self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
|
||||
)
|
||||
|
||||
return examples
|
||||
|
||||
@@ -283,7 +285,11 @@ class PythonTestAnalyzer:
|
||||
return None
|
||||
|
||||
def _analyze_test_body(
|
||||
self, func_node: ast.FunctionDef, file_path: str, imports: list[str], setup_code: str | None = None
|
||||
self,
|
||||
func_node: ast.FunctionDef,
|
||||
file_path: str,
|
||||
imports: list[str],
|
||||
setup_code: str | None = None,
|
||||
) -> list[TestExample]:
|
||||
"""Analyze test function body for extractable patterns"""
|
||||
examples = []
|
||||
@@ -297,7 +303,9 @@ class PythonTestAnalyzer:
|
||||
# Extract different pattern categories
|
||||
|
||||
# 1. Instantiation patterns
|
||||
instantiations = self._find_instantiations(func_node, file_path, docstring, setup_code, tags, imports)
|
||||
instantiations = self._find_instantiations(
|
||||
func_node, file_path, docstring, setup_code, tags, imports
|
||||
)
|
||||
examples.extend(instantiations)
|
||||
|
||||
# 2. Method calls with assertions
|
||||
@@ -307,7 +315,9 @@ class PythonTestAnalyzer:
|
||||
examples.extend(method_calls)
|
||||
|
||||
# 3. Configuration dictionaries
|
||||
configs = self._find_config_dicts(func_node, file_path, docstring, setup_code, tags, imports)
|
||||
configs = self._find_config_dicts(
|
||||
func_node, file_path, docstring, setup_code, tags, imports
|
||||
)
|
||||
examples.extend(configs)
|
||||
|
||||
# 4. Multi-step workflows (integration tests)
|
||||
@@ -707,7 +717,13 @@ class GenericTestAnalyzer:
|
||||
return examples
|
||||
|
||||
def _create_example(
|
||||
self, test_name: str, category: str, code: str, language: str, file_path: str, line_number: int
|
||||
self,
|
||||
test_name: str,
|
||||
category: str,
|
||||
code: str,
|
||||
language: str,
|
||||
file_path: str,
|
||||
line_number: int,
|
||||
) -> TestExample:
|
||||
"""Create TestExample from regex match"""
|
||||
return TestExample(
|
||||
@@ -891,7 +907,9 @@ class TestExampleExtractor:
|
||||
# Limit per file
|
||||
if len(filtered_examples) > self.max_per_file:
|
||||
# Sort by confidence and take top N
|
||||
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[: self.max_per_file]
|
||||
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
|
||||
: self.max_per_file
|
||||
]
|
||||
|
||||
logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
|
||||
|
||||
@@ -915,7 +933,10 @@ class TestExampleExtractor:
|
||||
return self.LANGUAGE_MAP.get(suffix, "Unknown")
|
||||
|
||||
def _create_report(
|
||||
self, examples: list[TestExample], file_path: str | None = None, directory: str | None = None
|
||||
self,
|
||||
examples: list[TestExample],
|
||||
file_path: str | None = None,
|
||||
directory: str | None = None,
|
||||
) -> ExampleReport:
|
||||
"""Create summary report from examples"""
|
||||
# Enhance examples with AI analysis (C3.6)
|
||||
@@ -932,15 +953,21 @@ class TestExampleExtractor:
|
||||
# Count by category
|
||||
examples_by_category = {}
|
||||
for example in examples:
|
||||
examples_by_category[example.category] = examples_by_category.get(example.category, 0) + 1
|
||||
examples_by_category[example.category] = (
|
||||
examples_by_category.get(example.category, 0) + 1
|
||||
)
|
||||
|
||||
# Count by language
|
||||
examples_by_language = {}
|
||||
for example in examples:
|
||||
examples_by_language[example.language] = examples_by_language.get(example.language, 0) + 1
|
||||
examples_by_language[example.language] = (
|
||||
examples_by_language.get(example.language, 0) + 1
|
||||
)
|
||||
|
||||
# Calculate averages
|
||||
avg_complexity = sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
|
||||
avg_complexity = (
|
||||
sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
|
||||
)
|
||||
high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
|
||||
|
||||
return ExampleReport(
|
||||
@@ -983,15 +1010,25 @@ Examples:
|
||||
|
||||
parser.add_argument("directory", nargs="?", help="Directory containing test files")
|
||||
parser.add_argument("--file", help="Single test file to analyze")
|
||||
parser.add_argument("--language", help="Filter by programming language (python, javascript, etc.)")
|
||||
parser.add_argument(
|
||||
"--min-confidence", type=float, default=0.5, help="Minimum confidence threshold (0.0-1.0, default: 0.5)"
|
||||
"--language", help="Filter by programming language (python, javascript, etc.)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-confidence",
|
||||
type=float,
|
||||
default=0.5,
|
||||
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
|
||||
)
|
||||
parser.add_argument("--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)")
|
||||
parser.add_argument("--json", action="store_true", help="Output JSON format")
|
||||
parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
|
||||
parser.add_argument(
|
||||
"--recursive", action="store_true", default=True, help="Search directory recursively (default: True)"
|
||||
"--recursive",
|
||||
action="store_true",
|
||||
default=True,
|
||||
help="Search directory recursively (default: True)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -21,7 +21,12 @@ def test_validate_existing_unified_configs():
|
||||
"""Test that all existing unified configs are valid"""
|
||||
configs_dir = Path(__file__).parent.parent / "configs"
|
||||
|
||||
unified_configs = ["godot_unified.json", "react_unified.json", "django_unified.json", "fastapi_unified.json"]
|
||||
unified_configs = [
|
||||
"godot_unified.json",
|
||||
"react_unified.json",
|
||||
"django_unified.json",
|
||||
"fastapi_unified.json",
|
||||
]
|
||||
|
||||
for config_name in unified_configs:
|
||||
config_path = configs_dir / config_name
|
||||
@@ -56,8 +61,18 @@ def test_create_temp_unified_config():
|
||||
"description": "Test unified config",
|
||||
"merge_mode": "rule-based",
|
||||
"sources": [
|
||||
{"type": "documentation", "base_url": "https://example.com/docs", "extract_api": True, "max_pages": 50},
|
||||
{"type": "github", "repo": "test/repo", "include_code": True, "code_analysis_depth": "surface"},
|
||||
{
|
||||
"type": "documentation",
|
||||
"base_url": "https://example.com/docs",
|
||||
"extract_api": True,
|
||||
"max_pages": 50,
|
||||
},
|
||||
{
|
||||
"type": "github",
|
||||
"repo": "test/repo",
|
||||
"include_code": True,
|
||||
"code_analysis_depth": "surface",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -69,7 +69,11 @@ class UnifiedCodebaseAnalyzer:
|
||||
self.github_token = github_token or os.getenv("GITHUB_TOKEN")
|
||||
|
||||
def analyze(
|
||||
self, source: str, depth: str = "c3x", fetch_github_metadata: bool = True, output_dir: Path | None = None
|
||||
self,
|
||||
source: str,
|
||||
depth: str = "c3x",
|
||||
fetch_github_metadata: bool = True,
|
||||
output_dir: Path | None = None,
|
||||
) -> AnalysisResult:
|
||||
"""
|
||||
Analyze codebase with specified depth.
|
||||
@@ -123,7 +127,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
|
||||
|
||||
# Build result with all streams
|
||||
result = AnalysisResult(code_analysis=code_analysis, source_type="github", analysis_depth=depth)
|
||||
result = AnalysisResult(
|
||||
code_analysis=code_analysis, source_type="github", analysis_depth=depth
|
||||
)
|
||||
|
||||
# Add GitHub-specific data if available
|
||||
if fetch_metadata:
|
||||
@@ -168,7 +174,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
else:
|
||||
raise ValueError(f"Unknown depth: {depth}. Use 'basic' or 'c3x'")
|
||||
|
||||
return AnalysisResult(code_analysis=code_analysis, source_type="local", analysis_depth=depth)
|
||||
return AnalysisResult(
|
||||
code_analysis=code_analysis, source_type="local", analysis_depth=depth
|
||||
)
|
||||
|
||||
def basic_analysis(self, directory: Path) -> dict:
|
||||
"""
|
||||
@@ -423,7 +431,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
# Only include immediate subdirectories
|
||||
structure["children"].append({"name": item.name, "type": "directory"})
|
||||
elif item.is_file():
|
||||
structure["children"].append({"name": item.name, "type": "file", "extension": item.suffix})
|
||||
structure["children"].append(
|
||||
{"name": item.name, "type": "file", "extension": item.suffix}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -406,7 +406,13 @@ class UnifiedScraper:
|
||||
|
||||
# Append to list instead of overwriting (multi-source support)
|
||||
self.scraped_data["github"].append(
|
||||
{"repo": repo, "repo_id": repo_id, "idx": idx, "data": github_data, "data_file": github_data_file}
|
||||
{
|
||||
"repo": repo,
|
||||
"repo_id": repo_id,
|
||||
"idx": idx,
|
||||
"data": github_data,
|
||||
"data_file": github_data_file,
|
||||
}
|
||||
)
|
||||
|
||||
# Build standalone SKILL.md for synthesis using GitHubToSkillConverter
|
||||
@@ -433,7 +439,9 @@ class UnifiedScraper:
|
||||
logger.info(f"📦 Moved GitHub output to cache: {cache_github_dir}")
|
||||
|
||||
if os.path.exists(github_data_file_path):
|
||||
cache_github_data = os.path.join(self.data_dir, f"{github_config['name']}_github_data.json")
|
||||
cache_github_data = os.path.join(
|
||||
self.data_dir, f"{github_config['name']}_github_data.json"
|
||||
)
|
||||
if os.path.exists(cache_github_data):
|
||||
os.remove(cache_github_data)
|
||||
shutil.move(github_data_file_path, cache_github_data)
|
||||
@@ -478,7 +486,13 @@ class UnifiedScraper:
|
||||
|
||||
# Append to list instead of overwriting
|
||||
self.scraped_data["pdf"].append(
|
||||
{"pdf_path": pdf_path, "pdf_id": pdf_id, "idx": idx, "data": pdf_data, "data_file": pdf_data_file}
|
||||
{
|
||||
"pdf_path": pdf_path,
|
||||
"pdf_id": pdf_id,
|
||||
"idx": idx,
|
||||
"data": pdf_data,
|
||||
"data_file": pdf_data_file,
|
||||
}
|
||||
)
|
||||
|
||||
# Build standalone SKILL.md for synthesis
|
||||
@@ -611,12 +625,20 @@ class UnifiedScraper:
|
||||
# Load C3.x outputs into memory
|
||||
c3_data = {
|
||||
"patterns": self._load_json(temp_output / "patterns" / "detected_patterns.json"),
|
||||
"test_examples": self._load_json(temp_output / "test_examples" / "test_examples.json"),
|
||||
"test_examples": self._load_json(
|
||||
temp_output / "test_examples" / "test_examples.json"
|
||||
),
|
||||
"how_to_guides": self._load_guide_collection(temp_output / "tutorials"),
|
||||
"config_patterns": self._load_json(temp_output / "config_patterns" / "config_patterns.json"),
|
||||
"architecture": self._load_json(temp_output / "architecture" / "architectural_patterns.json"),
|
||||
"config_patterns": self._load_json(
|
||||
temp_output / "config_patterns" / "config_patterns.json"
|
||||
),
|
||||
"architecture": self._load_json(
|
||||
temp_output / "architecture" / "architectural_patterns.json"
|
||||
),
|
||||
"api_reference": self._load_api_reference(temp_output / "api_reference"), # C2.5
|
||||
"dependency_graph": self._load_json(temp_output / "dependencies" / "dependency_graph.json"), # C2.6
|
||||
"dependency_graph": self._load_json(
|
||||
temp_output / "dependencies" / "dependency_graph.json"
|
||||
), # C2.6
|
||||
}
|
||||
|
||||
# Log summary
|
||||
@@ -769,7 +791,9 @@ class UnifiedScraper:
|
||||
conflicts = conflicts_data.get("conflicts", [])
|
||||
|
||||
# Build skill
|
||||
builder = UnifiedSkillBuilder(self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir)
|
||||
builder = UnifiedSkillBuilder(
|
||||
self.config, self.scraped_data, merged_data, conflicts, cache_dir=self.cache_dir
|
||||
)
|
||||
|
||||
builder.build()
|
||||
|
||||
@@ -836,7 +860,10 @@ Examples:
|
||||
|
||||
parser.add_argument("--config", "-c", required=True, help="Path to unified config JSON file")
|
||||
parser.add_argument(
|
||||
"--merge-mode", "-m", choices=["rule-based", "claude-enhanced"], help="Override config merge mode"
|
||||
"--merge-mode",
|
||||
"-m",
|
||||
choices=["rule-based", "claude-enhanced"],
|
||||
help="Override config merge mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-codebase-analysis",
|
||||
@@ -854,7 +881,9 @@ Examples:
|
||||
for source in scraper.config.get("sources", []):
|
||||
if source["type"] == "github":
|
||||
source["enable_codebase_analysis"] = False
|
||||
logger.info(f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}")
|
||||
logger.info(
|
||||
f"⏭️ Skipping codebase analysis for GitHub source: {source.get('repo', 'unknown')}"
|
||||
)
|
||||
|
||||
# Run scraper
|
||||
scraper.run()
|
||||
|
||||
@@ -97,7 +97,9 @@ class UnifiedSkillBuilder:
|
||||
if docs_skill_path.exists():
|
||||
try:
|
||||
skill_mds["documentation"] = docs_skill_path.read_text(encoding="utf-8")
|
||||
logger.debug(f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)")
|
||||
logger.debug(
|
||||
f"Loaded documentation SKILL.md ({len(skill_mds['documentation'])} chars)"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to read documentation SKILL.md: {e}")
|
||||
|
||||
@@ -109,7 +111,9 @@ class UnifiedSkillBuilder:
|
||||
try:
|
||||
content = github_skill_path.read_text(encoding="utf-8")
|
||||
github_sources.append(content)
|
||||
logger.debug(f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)")
|
||||
logger.debug(
|
||||
f"Loaded GitHub SKILL.md from {github_dir.name} ({len(content)} chars)"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to read GitHub SKILL.md from {github_dir.name}: {e}")
|
||||
|
||||
@@ -165,7 +169,23 @@ class UnifiedSkillBuilder:
|
||||
current_section = line[3:].strip()
|
||||
# Remove emoji and markdown formatting
|
||||
current_section = current_section.split("](")[0] # Remove links
|
||||
for emoji in ["📚", "🏗️", "⚠️", "🔧", "📖", "💡", "🎯", "📊", "🔍", "⚙️", "🧪", "📝", "🗂️", "📐", "⚡"]:
|
||||
for emoji in [
|
||||
"📚",
|
||||
"🏗️",
|
||||
"⚠️",
|
||||
"🔧",
|
||||
"📖",
|
||||
"💡",
|
||||
"🎯",
|
||||
"📊",
|
||||
"🔍",
|
||||
"⚙️",
|
||||
"🧪",
|
||||
"📝",
|
||||
"🗂️",
|
||||
"📐",
|
||||
"⚡",
|
||||
]:
|
||||
current_section = current_section.replace(emoji, "").strip()
|
||||
current_content = []
|
||||
elif current_section:
|
||||
@@ -268,7 +288,9 @@ This skill synthesizes knowledge from multiple sources:
|
||||
|
||||
if "Quick Reference" in github_sections:
|
||||
# Include GitHub's Quick Reference (contains design patterns summary)
|
||||
logger.info(f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)")
|
||||
logger.info(
|
||||
f"DEBUG: Including GitHub Quick Reference ({len(github_sections['Quick Reference'])} chars)"
|
||||
)
|
||||
content += github_sections["Quick Reference"] + "\n\n"
|
||||
else:
|
||||
logger.warning("DEBUG: GitHub Quick Reference section NOT FOUND!")
|
||||
@@ -330,7 +352,9 @@ This skill synthesizes knowledge from multiple sources:
|
||||
|
||||
# Footer
|
||||
content += "---\n\n"
|
||||
content += "*Synthesized from official documentation and codebase analysis by Skill Seekers*\n"
|
||||
content += (
|
||||
"*Synthesized from official documentation and codebase analysis by Skill Seekers*\n"
|
||||
)
|
||||
|
||||
return content
|
||||
|
||||
@@ -602,7 +626,9 @@ This skill combines knowledge from multiple sources:
|
||||
# Count by type
|
||||
by_type = {}
|
||||
for conflict in self.conflicts:
|
||||
ctype = conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown")
|
||||
ctype = (
|
||||
conflict.type if hasattr(conflict, "type") else conflict.get("type", "unknown")
|
||||
)
|
||||
by_type[ctype] = by_type.get(ctype, 0) + 1
|
||||
|
||||
content += "**Conflict Breakdown:**\n"
|
||||
@@ -836,7 +862,9 @@ This skill combines knowledge from multiple sources:
|
||||
source_id = doc_source.get("source_id", "unknown")
|
||||
base_url = doc_source.get("base_url", "Unknown")
|
||||
total_pages = doc_source.get("total_pages", "N/A")
|
||||
f.write(f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n")
|
||||
f.write(
|
||||
f"- [{source_id}]({source_id}/index.md) - {base_url} ({total_pages} pages)\n"
|
||||
)
|
||||
|
||||
logger.info(f"Created documentation references ({len(docs_list)} sources)")
|
||||
|
||||
@@ -1084,9 +1112,13 @@ This skill combines knowledge from multiple sources:
|
||||
pattern_summary[ptype] = pattern_summary.get(ptype, 0) + 1
|
||||
|
||||
if pattern_summary:
|
||||
for ptype, count in sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True):
|
||||
for ptype, count in sorted(
|
||||
pattern_summary.items(), key=lambda x: x[1], reverse=True
|
||||
):
|
||||
f.write(f"- **{ptype}**: {count} instance(s)\n")
|
||||
f.write("\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n")
|
||||
f.write(
|
||||
"\n📁 See `references/codebase_analysis/patterns/` for detailed analysis.\n\n"
|
||||
)
|
||||
else:
|
||||
f.write("*No design patterns detected.*\n\n")
|
||||
|
||||
@@ -1115,7 +1147,9 @@ This skill combines knowledge from multiple sources:
|
||||
f.write("\n**Recommended Actions**:\n")
|
||||
for action in insights["recommended_actions"][:5]:
|
||||
f.write(f"- {action}\n")
|
||||
f.write("\n📁 See `references/codebase_analysis/configuration/` for details.\n\n")
|
||||
f.write(
|
||||
"\n📁 See `references/codebase_analysis/configuration/` for details.\n\n"
|
||||
)
|
||||
else:
|
||||
f.write("*No configuration files detected.*\n\n")
|
||||
|
||||
@@ -1128,7 +1162,9 @@ This skill combines knowledge from multiple sources:
|
||||
f.write(f"**{len(guides)} how-to guide(s) extracted from codebase**:\n\n")
|
||||
for guide in guides[:10]: # Top 10
|
||||
f.write(f"- {guide.get('title', 'Untitled Guide')}\n")
|
||||
f.write("\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n")
|
||||
f.write(
|
||||
"\n📁 See `references/codebase_analysis/guides/` for detailed tutorials.\n\n"
|
||||
)
|
||||
else:
|
||||
f.write("*No workflow guides extracted.*\n\n")
|
||||
|
||||
@@ -1147,11 +1183,15 @@ This skill combines knowledge from multiple sources:
|
||||
if examples.get("examples_by_category"):
|
||||
f.write("\n**By Category**:\n")
|
||||
for cat, count in sorted(
|
||||
examples["examples_by_category"].items(), key=lambda x: x[1], reverse=True
|
||||
examples["examples_by_category"].items(),
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
):
|
||||
f.write(f"- {cat}: {count}\n")
|
||||
|
||||
f.write("\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n")
|
||||
f.write(
|
||||
"\n📁 See `references/codebase_analysis/examples/` for code samples.\n\n"
|
||||
)
|
||||
else:
|
||||
f.write("*No test examples extracted.*\n\n")
|
||||
|
||||
@@ -1163,13 +1203,17 @@ This skill combines knowledge from multiple sources:
|
||||
dir_struct = c3_data["architecture"].get("directory_structure", {})
|
||||
if dir_struct:
|
||||
f.write("**Main Directories**:\n")
|
||||
for dir_name, file_count in sorted(dir_struct.items(), key=lambda x: x[1], reverse=True)[:15]:
|
||||
for dir_name, file_count in sorted(
|
||||
dir_struct.items(), key=lambda x: x[1], reverse=True
|
||||
)[:15]:
|
||||
f.write(f"- `{dir_name}/`: {file_count} file(s)\n")
|
||||
f.write("\n")
|
||||
|
||||
# Footer
|
||||
f.write("---\n\n")
|
||||
f.write("*This architecture overview was automatically generated by C3.x codebase analysis.*\n")
|
||||
f.write(
|
||||
"*This architecture overview was automatically generated by C3.x codebase analysis.*\n"
|
||||
)
|
||||
f.write("*Last updated: skill build time*\n")
|
||||
|
||||
logger.info("📐 Created ARCHITECTURE.md")
|
||||
@@ -1277,7 +1321,9 @@ This skill combines knowledge from multiple sources:
|
||||
if guides:
|
||||
f.write("## Available Guides\n\n")
|
||||
for guide in guides:
|
||||
f.write(f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n")
|
||||
f.write(
|
||||
f"- [{guide.get('title', 'Untitled')}](guide_{guide.get('id', 'unknown')}.md)\n"
|
||||
)
|
||||
f.write("\n")
|
||||
|
||||
# Save individual guide markdown files
|
||||
@@ -1351,7 +1397,9 @@ This skill combines knowledge from multiple sources:
|
||||
if insights:
|
||||
f.write("## Overall Insights\n\n")
|
||||
if insights.get("security_issues_found"):
|
||||
f.write(f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n")
|
||||
f.write(
|
||||
f"🔐 **Security Issues**: {insights['security_issues_found']}\n\n"
|
||||
)
|
||||
if insights.get("recommended_actions"):
|
||||
f.write("**Recommended Actions**:\n")
|
||||
for action in insights["recommended_actions"]:
|
||||
@@ -1425,7 +1473,9 @@ This skill combines knowledge from multiple sources:
|
||||
|
||||
top_patterns = sorted(pattern_summary.items(), key=lambda x: x[1], reverse=True)[:3]
|
||||
if top_patterns:
|
||||
content += f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
|
||||
content += (
|
||||
f"- Top patterns: {', '.join([f'{p[0]} ({p[1]})' for p in top_patterns])}\n"
|
||||
)
|
||||
content += "\n"
|
||||
|
||||
# Add test examples summary
|
||||
@@ -1449,7 +1499,9 @@ This skill combines knowledge from multiple sources:
|
||||
|
||||
# Add security warning if present
|
||||
if c3_data["config_patterns"].get("ai_enhancements"):
|
||||
insights = c3_data["config_patterns"]["ai_enhancements"].get("overall_insights", {})
|
||||
insights = c3_data["config_patterns"]["ai_enhancements"].get(
|
||||
"overall_insights", {}
|
||||
)
|
||||
security_issues = insights.get("security_issues_found", 0)
|
||||
if security_issues > 0:
|
||||
content += f"- 🔐 **Security Alert**: {security_issues} issue(s) detected\n"
|
||||
@@ -1477,7 +1529,8 @@ This skill combines knowledge from multiple sources:
|
||||
medium = [
|
||||
c
|
||||
for c in self.conflicts
|
||||
if (hasattr(c, "severity") and c.severity == "medium") or c.get("severity") == "medium"
|
||||
if (hasattr(c, "severity") and c.severity == "medium")
|
||||
or c.get("severity") == "medium"
|
||||
]
|
||||
low = [
|
||||
c
|
||||
@@ -1497,9 +1550,15 @@ This skill combines knowledge from multiple sources:
|
||||
|
||||
for conflict in high:
|
||||
api_name = (
|
||||
conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown")
|
||||
conflict.api_name
|
||||
if hasattr(conflict, "api_name")
|
||||
else conflict.get("api_name", "Unknown")
|
||||
)
|
||||
diff = (
|
||||
conflict.difference
|
||||
if hasattr(conflict, "difference")
|
||||
else conflict.get("difference", "N/A")
|
||||
)
|
||||
diff = conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A")
|
||||
|
||||
f.write(f"### {api_name}\n\n")
|
||||
f.write(f"**Issue**: {diff}\n\n")
|
||||
@@ -1510,9 +1569,15 @@ This skill combines knowledge from multiple sources:
|
||||
|
||||
for conflict in medium[:20]: # Limit to 20
|
||||
api_name = (
|
||||
conflict.api_name if hasattr(conflict, "api_name") else conflict.get("api_name", "Unknown")
|
||||
conflict.api_name
|
||||
if hasattr(conflict, "api_name")
|
||||
else conflict.get("api_name", "Unknown")
|
||||
)
|
||||
diff = (
|
||||
conflict.difference
|
||||
if hasattr(conflict, "difference")
|
||||
else conflict.get("difference", "N/A")
|
||||
)
|
||||
diff = conflict.difference if hasattr(conflict, "difference") else conflict.get("difference", "N/A")
|
||||
|
||||
f.write(f"### {api_name}\n\n")
|
||||
f.write(f"{diff}\n\n")
|
||||
@@ -1534,7 +1599,9 @@ if __name__ == "__main__":
|
||||
config = json.load(f)
|
||||
|
||||
# Mock scraped data
|
||||
scraped_data = {"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}}
|
||||
scraped_data = {
|
||||
"github": {"data": {"readme": "# Test Repository", "issues": [], "releases": []}}
|
||||
}
|
||||
|
||||
builder = UnifiedSkillBuilder(config, scraped_data)
|
||||
builder.build()
|
||||
|
||||
@@ -179,7 +179,9 @@ def validate_zip_file(zip_path: str | Path) -> tuple[bool, str | None]:
|
||||
return True, None
|
||||
|
||||
|
||||
def read_reference_files(skill_dir: str | Path, max_chars: int = 100000, preview_limit: int = 40000) -> dict[str, dict]:
|
||||
def read_reference_files(
|
||||
skill_dir: str | Path, max_chars: int = 100000, preview_limit: int = 40000
|
||||
) -> dict[str, dict]:
|
||||
"""Read reference files from a skill directory with enriched metadata.
|
||||
|
||||
This function reads markdown files from the references/ subdirectory
|
||||
@@ -319,7 +321,10 @@ def read_reference_files(skill_dir: str | Path, max_chars: int = 100000, preview
|
||||
|
||||
|
||||
def retry_with_backoff(
|
||||
operation: Callable[[], T], max_attempts: int = 3, base_delay: float = 1.0, operation_name: str = "operation"
|
||||
operation: Callable[[], T],
|
||||
max_attempts: int = 3,
|
||||
base_delay: float = 1.0,
|
||||
operation_name: str = "operation",
|
||||
) -> T:
|
||||
"""Retry an operation with exponential backoff.
|
||||
|
||||
@@ -355,7 +360,12 @@ def retry_with_backoff(
|
||||
if attempt < max_attempts:
|
||||
delay = base_delay * (2 ** (attempt - 1))
|
||||
logger.warning(
|
||||
"%s failed (attempt %d/%d), retrying in %.1fs: %s", operation_name, attempt, max_attempts, delay, e
|
||||
"%s failed (attempt %d/%d), retrying in %.1fs: %s",
|
||||
operation_name,
|
||||
attempt,
|
||||
max_attempts,
|
||||
delay,
|
||||
e,
|
||||
)
|
||||
time.sleep(delay)
|
||||
else:
|
||||
@@ -368,7 +378,10 @@ def retry_with_backoff(
|
||||
|
||||
|
||||
async def retry_with_backoff_async(
|
||||
operation: Callable[[], T], max_attempts: int = 3, base_delay: float = 1.0, operation_name: str = "operation"
|
||||
operation: Callable[[], T],
|
||||
max_attempts: int = 3,
|
||||
base_delay: float = 1.0,
|
||||
operation_name: str = "operation",
|
||||
) -> T:
|
||||
"""Async version of retry_with_backoff for async operations.
|
||||
|
||||
@@ -403,7 +416,12 @@ async def retry_with_backoff_async(
|
||||
if attempt < max_attempts:
|
||||
delay = base_delay * (2 ** (attempt - 1))
|
||||
logger.warning(
|
||||
"%s failed (attempt %d/%d), retrying in %.1fs: %s", operation_name, attempt, max_attempts, delay, e
|
||||
"%s failed (attempt %d/%d), retrying in %.1fs: %s",
|
||||
operation_name,
|
||||
attempt,
|
||||
max_attempts,
|
||||
delay,
|
||||
e,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
|
||||
@@ -138,7 +138,9 @@ class AgentDetector:
|
||||
return None
|
||||
return self.AGENT_CONFIG[agent_id]["transport"]
|
||||
|
||||
def generate_config(self, agent_id: str, server_command: str, http_port: int | None = 3000) -> str | None:
|
||||
def generate_config(
|
||||
self, agent_id: str, server_command: str, http_port: int | None = 3000
|
||||
) -> str | None:
|
||||
"""
|
||||
Generate MCP configuration for a specific agent.
|
||||
|
||||
@@ -282,7 +284,9 @@ def detect_agents() -> list[dict[str, str]]:
|
||||
return detector.detect_agents()
|
||||
|
||||
|
||||
def generate_config(agent_name: str, server_command: str = "skill-seekers mcp", http_port: int = 3000) -> str | None:
|
||||
def generate_config(
|
||||
agent_name: str, server_command: str = "skill-seekers mcp", http_port: int = 3000
|
||||
) -> str | None:
|
||||
"""
|
||||
Convenience function to generate config for a specific agent.
|
||||
|
||||
|
||||
@@ -118,7 +118,8 @@ class GitConfigRepo:
|
||||
) from e
|
||||
elif "not found" in error_msg.lower() or "404" in error_msg:
|
||||
raise GitCommandError(
|
||||
f"Repository not found: {git_url}. Verify the URL is correct and you have access.", 128
|
||||
f"Repository not found: {git_url}. Verify the URL is correct and you have access.",
|
||||
128,
|
||||
) from e
|
||||
else:
|
||||
raise GitCommandError(f"Failed to clone repository: {error_msg}", 128) from e
|
||||
|
||||
@@ -139,14 +139,20 @@ try:
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(
|
||||
name="scrape_docs", description="Scrape documentation", inputSchema={"type": "object", "properties": {}}
|
||||
name="scrape_docs",
|
||||
description="Scrape documentation",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(
|
||||
name="scrape_github",
|
||||
description="Scrape GitHub repository",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(name="scrape_pdf", description="Scrape PDF file", inputSchema={"type": "object", "properties": {}}),
|
||||
Tool(
|
||||
name="scrape_pdf",
|
||||
description="Scrape PDF file",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(
|
||||
name="package_skill",
|
||||
description="Package skill into .zip",
|
||||
@@ -157,9 +163,15 @@ try:
|
||||
description="Upload skill to Claude",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(name="install_skill", description="Install skill", inputSchema={"type": "object", "properties": {}}),
|
||||
Tool(
|
||||
name="split_config", description="Split large config", inputSchema={"type": "object", "properties": {}}
|
||||
name="install_skill",
|
||||
description="Install skill",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(
|
||||
name="split_config",
|
||||
description="Split large config",
|
||||
inputSchema={"type": "object", "properties": {}},
|
||||
),
|
||||
Tool(
|
||||
name="generate_router",
|
||||
|
||||
@@ -726,7 +726,13 @@ async def estimate_pages_tool(args: dict) -> list[TextContent]:
|
||||
timeout = max(300, max_discovery // 2) # Minimum 5 minutes
|
||||
|
||||
# Run estimate_pages.py
|
||||
cmd = [sys.executable, str(CLI_DIR / "estimate_pages.py"), config_path, "--max-discovery", str(max_discovery)]
|
||||
cmd = [
|
||||
sys.executable,
|
||||
str(CLI_DIR / "estimate_pages.py"),
|
||||
config_path,
|
||||
"--max-discovery",
|
||||
str(max_discovery),
|
||||
]
|
||||
|
||||
progress_msg = "🔄 Estimating page count...\n"
|
||||
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
|
||||
@@ -980,7 +986,9 @@ async def validate_config_tool(args: dict) -> list[TextContent]:
|
||||
try:
|
||||
# Check if file exists
|
||||
if not Path(config_path).exists():
|
||||
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
|
||||
]
|
||||
|
||||
# Try unified config validator first
|
||||
try:
|
||||
@@ -1004,7 +1012,9 @@ async def validate_config_tool(args: dict) -> list[TextContent]:
|
||||
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
|
||||
elif source["type"] == "github":
|
||||
result += f" Repo: {source.get('repo', 'N/A')}\n"
|
||||
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
|
||||
result += (
|
||||
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
|
||||
)
|
||||
elif source["type"] == "pdf":
|
||||
result += f" Path: {source.get('path', 'N/A')}\n"
|
||||
|
||||
@@ -1106,7 +1116,9 @@ async def generate_router_tool(args: dict) -> list[TextContent]:
|
||||
config_files = glob.glob(config_pattern)
|
||||
|
||||
if not config_files:
|
||||
return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
|
||||
]
|
||||
|
||||
# Run generate_router.py
|
||||
cmd = [
|
||||
@@ -1159,7 +1171,11 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
|
||||
cmd.extend(["--from-json", from_json])
|
||||
|
||||
else:
|
||||
return [TextContent(type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
|
||||
)
|
||||
]
|
||||
|
||||
# Run pdf_scraper.py with streaming (can take a while)
|
||||
timeout = 600 # 10 minutes for PDF extraction
|
||||
@@ -1257,7 +1273,12 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
|
||||
# MODE 1: Named Source (highest priority)
|
||||
if source_name:
|
||||
if not config_name:
|
||||
return [TextContent(type="text", text="❌ Error: config_name is required when using source parameter")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: config_name is required when using source parameter",
|
||||
)
|
||||
]
|
||||
|
||||
# Get source from registry
|
||||
source_manager = SourceManager()
|
||||
@@ -1278,7 +1299,11 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
|
||||
git_repo = GitConfigRepo()
|
||||
try:
|
||||
repo_path = git_repo.clone_or_pull(
|
||||
source_name=source_name, git_url=git_url, branch=branch, token=token, force_refresh=force_refresh
|
||||
source_name=source_name,
|
||||
git_url=git_url,
|
||||
branch=branch,
|
||||
token=token,
|
||||
force_refresh=force_refresh,
|
||||
)
|
||||
except Exception as e:
|
||||
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
|
||||
@@ -1320,7 +1345,12 @@ Next steps:
|
||||
# MODE 2: Direct Git URL
|
||||
elif git_url:
|
||||
if not config_name:
|
||||
return [TextContent(type="text", text="❌ Error: config_name is required when using git_url parameter")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: config_name is required when using git_url parameter",
|
||||
)
|
||||
]
|
||||
|
||||
# Clone/pull repository
|
||||
git_repo = GitConfigRepo()
|
||||
@@ -1418,7 +1448,9 @@ Next steps:
|
||||
if tags:
|
||||
result += f" Tags: {tags}\n"
|
||||
|
||||
result += "\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
|
||||
result += (
|
||||
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
|
||||
)
|
||||
result += f"📚 API Docs: {API_BASE_URL}/docs\n"
|
||||
|
||||
return [TextContent(type="text", text=result)]
|
||||
@@ -1426,7 +1458,10 @@ Next steps:
|
||||
# Download specific config
|
||||
if not config_name:
|
||||
return [
|
||||
TextContent(type="text", text="❌ Error: Please provide config_name or set list_available=true")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: Please provide config_name or set list_available=true",
|
||||
)
|
||||
]
|
||||
|
||||
# Get config details first
|
||||
@@ -1486,11 +1521,14 @@ Next steps:
|
||||
except httpx.HTTPError as e:
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later."
|
||||
type="text",
|
||||
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
|
||||
)
|
||||
]
|
||||
except json.JSONDecodeError as e:
|
||||
return [TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
|
||||
]
|
||||
except Exception as e:
|
||||
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
|
||||
|
||||
@@ -1575,7 +1613,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
|
||||
if not dry_run:
|
||||
# Call fetch_config_tool directly
|
||||
fetch_result = await fetch_config_tool({"config_name": config_name, "destination": destination})
|
||||
fetch_result = await fetch_config_tool(
|
||||
{"config_name": config_name, "destination": destination}
|
||||
)
|
||||
|
||||
# Parse result to extract config path
|
||||
fetch_output = fetch_result[0].text
|
||||
@@ -1589,7 +1629,12 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
workflow_state["config_path"] = match.group(1).strip()
|
||||
output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
|
||||
else:
|
||||
return [TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Failed to fetch config")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
|
||||
)
|
||||
]
|
||||
|
||||
workflow_state["phases_completed"].append("fetch_config")
|
||||
else:
|
||||
@@ -1614,7 +1659,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
workflow_state["skill_name"] = config.get("name", "unknown")
|
||||
except Exception as e:
|
||||
return [
|
||||
TextContent(type="text", text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
|
||||
)
|
||||
]
|
||||
|
||||
# Call scrape_docs_tool (does NOT include enhancement)
|
||||
@@ -1638,7 +1686,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
# Check for success
|
||||
if "❌" in scrape_output:
|
||||
return [
|
||||
TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
|
||||
)
|
||||
]
|
||||
|
||||
workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
|
||||
@@ -1738,7 +1789,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
if not dry_run:
|
||||
if has_api_key:
|
||||
# Call upload_skill_tool
|
||||
upload_result = await upload_skill_tool({"skill_zip": workflow_state["zip_path"]})
|
||||
upload_result = await upload_skill_tool(
|
||||
{"skill_zip": workflow_state["zip_path"]}
|
||||
)
|
||||
|
||||
upload_output = upload_result[0].text
|
||||
output_lines.append(upload_output)
|
||||
@@ -1813,7 +1866,10 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
from github import Github, GithubException
|
||||
except ImportError:
|
||||
return [
|
||||
TextContent(type="text", text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
|
||||
)
|
||||
]
|
||||
|
||||
config_path = args.get("config_path")
|
||||
@@ -1826,7 +1882,9 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
if config_path:
|
||||
config_file = Path(config_path)
|
||||
if not config_file.exists():
|
||||
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
|
||||
]
|
||||
|
||||
with open(config_file) as f:
|
||||
config_data = json.load(f)
|
||||
@@ -1841,7 +1899,11 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
|
||||
|
||||
else:
|
||||
return [TextContent(type="text", text="❌ Error: Must provide either config_path or config_json")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text="❌ Error: Must provide either config_path or config_json"
|
||||
)
|
||||
]
|
||||
|
||||
# Use ConfigValidator for comprehensive validation
|
||||
if ConfigValidator is None:
|
||||
@@ -1871,14 +1933,20 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
if not is_unified:
|
||||
# Legacy config - check base_url
|
||||
base_url = config_data.get("base_url", "")
|
||||
if base_url and not (base_url.startswith("http://") or base_url.startswith("https://")):
|
||||
raise ValueError(f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://")
|
||||
if base_url and not (
|
||||
base_url.startswith("http://") or base_url.startswith("https://")
|
||||
):
|
||||
raise ValueError(
|
||||
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
|
||||
)
|
||||
else:
|
||||
# Unified config - check URLs in sources
|
||||
for idx, source in enumerate(config_data.get("sources", [])):
|
||||
if source.get("type") == "documentation":
|
||||
source_url = source.get("base_url", "")
|
||||
if source_url and not (source_url.startswith("http://") or source_url.startswith("https://")):
|
||||
if source_url and not (
|
||||
source_url.startswith("http://") or source_url.startswith("https://")
|
||||
):
|
||||
raise ValueError(
|
||||
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
|
||||
)
|
||||
@@ -1920,7 +1988,10 @@ Please fix these issues and try again.
|
||||
# For legacy configs, use name-based detection
|
||||
name_lower = config_name.lower()
|
||||
category = "other"
|
||||
if any(x in name_lower for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]):
|
||||
if any(
|
||||
x in name_lower
|
||||
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
|
||||
):
|
||||
category = "web-frameworks"
|
||||
elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
|
||||
category = "game-engines"
|
||||
@@ -1936,12 +2007,16 @@ Please fix these issues and try again.
|
||||
if "max_pages" not in config_data:
|
||||
warnings.append("⚠️ No max_pages set - will use default (100)")
|
||||
elif config_data.get("max_pages") in (None, -1):
|
||||
warnings.append("⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours")
|
||||
warnings.append(
|
||||
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
|
||||
)
|
||||
else:
|
||||
# Unified config warnings
|
||||
for src in config_data.get("sources", []):
|
||||
if src.get("type") == "documentation" and "max_pages" not in src:
|
||||
warnings.append("⚠️ No max_pages set for documentation source - will use default (100)")
|
||||
warnings.append(
|
||||
"⚠️ No max_pages set for documentation source - will use default (100)"
|
||||
)
|
||||
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
|
||||
warnings.append("⚠️ Unlimited scraping enabled for documentation source")
|
||||
|
||||
@@ -1996,7 +2071,9 @@ Please fix these issues and try again.
|
||||
|
||||
# Create issue
|
||||
issue = repo.create_issue(
|
||||
title=f"[CONFIG] {config_name}", body=issue_body, labels=["config-submission", "needs-review"]
|
||||
title=f"[CONFIG] {config_name}",
|
||||
body=issue_body,
|
||||
labels=["config-submission", "needs-review"],
|
||||
)
|
||||
|
||||
result = f"""✅ Config submitted successfully!
|
||||
|
||||
@@ -64,7 +64,9 @@ class SourceManager:
|
||||
"""
|
||||
# Validate name
|
||||
if not name or not name.replace("-", "").replace("_", "").isalnum():
|
||||
raise ValueError(f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores.")
|
||||
raise ValueError(
|
||||
f"Invalid source name '{name}'. Must be alphanumeric with optional hyphens/underscores."
|
||||
)
|
||||
|
||||
# Validate git_url
|
||||
if not git_url or not git_url.strip():
|
||||
@@ -136,7 +138,9 @@ class SourceManager:
|
||||
|
||||
# Not found - provide helpful error
|
||||
available = [s["name"] for s in registry["sources"]]
|
||||
raise KeyError(f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}")
|
||||
raise KeyError(
|
||||
f"Source '{name}' not found. Available sources: {', '.join(available) if available else 'none'}"
|
||||
)
|
||||
|
||||
def list_sources(self, enabled_only: bool = False) -> list[dict]:
|
||||
"""
|
||||
|
||||
@@ -169,7 +169,9 @@ async def validate_config(args: dict) -> list[TextContent]:
|
||||
try:
|
||||
# Check if file exists
|
||||
if not Path(config_path).exists():
|
||||
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
|
||||
]
|
||||
|
||||
# Try unified config validator first
|
||||
try:
|
||||
@@ -193,7 +195,9 @@ async def validate_config(args: dict) -> list[TextContent]:
|
||||
result += f" Max pages: {source.get('max_pages', 'Not set')}\n"
|
||||
elif source["type"] == "github":
|
||||
result += f" Repo: {source.get('repo', 'N/A')}\n"
|
||||
result += f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
|
||||
result += (
|
||||
f" Code depth: {source.get('code_analysis_depth', 'surface')}\n"
|
||||
)
|
||||
elif source["type"] == "pdf":
|
||||
result += f" Path: {source.get('path', 'N/A')}\n"
|
||||
|
||||
|
||||
@@ -252,14 +252,18 @@ async def upload_skill_tool(args: dict) -> list[TextContent]:
|
||||
except ValueError as e:
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai"
|
||||
type="text",
|
||||
text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai",
|
||||
)
|
||||
]
|
||||
|
||||
# Check if upload is supported
|
||||
if target == "markdown":
|
||||
return [
|
||||
TextContent(type="text", text="❌ Markdown export does not support upload. Use the packaged file manually.")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Markdown export does not support upload. Use the packaged file manually.",
|
||||
)
|
||||
]
|
||||
|
||||
# Run upload_skill.py with target parameter
|
||||
@@ -323,13 +327,18 @@ async def enhance_skill_tool(args: dict) -> list[TextContent]:
|
||||
except ValueError as e:
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai"
|
||||
type="text",
|
||||
text=f"❌ Invalid platform: {str(e)}\n\nSupported platforms: claude, gemini, openai",
|
||||
)
|
||||
]
|
||||
|
||||
# Check if enhancement is supported
|
||||
if not adaptor.supports_enhancement():
|
||||
return [TextContent(type="text", text=f"❌ {adaptor.PLATFORM_NAME} does not support AI enhancement")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ {adaptor.PLATFORM_NAME} does not support AI enhancement"
|
||||
)
|
||||
]
|
||||
|
||||
output_lines = []
|
||||
output_lines.append(f"🚀 Enhancing skill with {adaptor.PLATFORM_NAME}")
|
||||
@@ -373,12 +382,19 @@ async def enhance_skill_tool(args: dict) -> list[TextContent]:
|
||||
|
||||
if not api_key:
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ {env_var} not set. Set API key or pass via api_key parameter.")
|
||||
TextContent(
|
||||
type="text",
|
||||
text=f"❌ {env_var} not set. Set API key or pass via api_key parameter.",
|
||||
)
|
||||
]
|
||||
|
||||
# Validate API key
|
||||
if not adaptor.validate_api_key(api_key):
|
||||
return [TextContent(type="text", text=f"❌ Invalid API key format for {adaptor.PLATFORM_NAME}")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ Invalid API key format for {adaptor.PLATFORM_NAME}"
|
||||
)
|
||||
]
|
||||
|
||||
output_lines.append("Calling API for enhancement...")
|
||||
output_lines.append("")
|
||||
@@ -447,7 +463,8 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
except ValueError as e:
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ Error: {str(e)}\n\nSupported platforms: claude, gemini, openai, markdown"
|
||||
type="text",
|
||||
text=f"❌ Error: {str(e)}\n\nSupported platforms: claude, gemini, openai, markdown",
|
||||
)
|
||||
]
|
||||
|
||||
@@ -498,7 +515,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
|
||||
if not dry_run:
|
||||
# Call fetch_config_tool directly
|
||||
fetch_result = await fetch_config_tool({"config_name": config_name, "destination": destination})
|
||||
fetch_result = await fetch_config_tool(
|
||||
{"config_name": config_name, "destination": destination}
|
||||
)
|
||||
|
||||
# Parse result to extract config path
|
||||
fetch_output = fetch_result[0].text
|
||||
@@ -512,7 +531,12 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
workflow_state["config_path"] = match.group(1).strip()
|
||||
output_lines.append(f"✅ Config fetched: {workflow_state['config_path']}")
|
||||
else:
|
||||
return [TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Failed to fetch config")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="\n".join(output_lines) + "\n\n❌ Failed to fetch config",
|
||||
)
|
||||
]
|
||||
|
||||
workflow_state["phases_completed"].append("fetch_config")
|
||||
else:
|
||||
@@ -537,7 +561,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
workflow_state["skill_name"] = config.get("name", "unknown")
|
||||
except Exception as e:
|
||||
return [
|
||||
TextContent(type="text", text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="\n".join(output_lines) + f"\n\n❌ Failed to read config: {str(e)}",
|
||||
)
|
||||
]
|
||||
|
||||
# Call scrape_docs_tool (does NOT include enhancement)
|
||||
@@ -561,7 +588,10 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
# Check for success
|
||||
if "❌" in scrape_output:
|
||||
return [
|
||||
TextContent(type="text", text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="\n".join(output_lines) + "\n\n❌ Scraping failed - see error above",
|
||||
)
|
||||
]
|
||||
|
||||
workflow_state["skill_dir"] = f"{destination}/{workflow_state['skill_name']}"
|
||||
@@ -641,9 +671,13 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
else:
|
||||
# Fallback: construct package path based on platform
|
||||
if target == "gemini":
|
||||
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}-gemini.tar.gz"
|
||||
workflow_state["zip_path"] = (
|
||||
f"{destination}/{workflow_state['skill_name']}-gemini.tar.gz"
|
||||
)
|
||||
elif target == "openai":
|
||||
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}-openai.zip"
|
||||
workflow_state["zip_path"] = (
|
||||
f"{destination}/{workflow_state['skill_name']}-openai.zip"
|
||||
)
|
||||
else:
|
||||
workflow_state["zip_path"] = f"{destination}/{workflow_state['skill_name']}.zip"
|
||||
|
||||
@@ -660,7 +694,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
pkg_ext = "zip"
|
||||
pkg_file = f"{destination}/{workflow_state['skill_name']}.zip"
|
||||
|
||||
output_lines.append(f" [DRY RUN] Would package to {pkg_ext} file for {adaptor.PLATFORM_NAME}")
|
||||
output_lines.append(
|
||||
f" [DRY RUN] Would package to {pkg_ext} file for {adaptor.PLATFORM_NAME}"
|
||||
)
|
||||
workflow_state["zip_path"] = pkg_file
|
||||
|
||||
output_lines.append("")
|
||||
@@ -725,7 +761,9 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
output_lines.append(" (No API key needed - markdown is export only)")
|
||||
output_lines.append(f" Package created: {workflow_state['zip_path']}")
|
||||
else:
|
||||
output_lines.append(f" [DRY RUN] Would upload to {adaptor.PLATFORM_NAME} (if API key set)")
|
||||
output_lines.append(
|
||||
f" [DRY RUN] Would upload to {adaptor.PLATFORM_NAME} (if API key set)"
|
||||
)
|
||||
|
||||
output_lines.append("")
|
||||
|
||||
@@ -757,12 +795,16 @@ async def install_skill_tool(args: dict) -> list[TextContent]:
|
||||
output_lines.append(" Go to https://aistudio.google.com/ to use it")
|
||||
elif target == "openai":
|
||||
output_lines.append("🎉 Your assistant is now available in OpenAI!")
|
||||
output_lines.append(" Go to https://platform.openai.com/assistants/ to use it")
|
||||
output_lines.append(
|
||||
" Go to https://platform.openai.com/assistants/ to use it"
|
||||
)
|
||||
elif auto_upload:
|
||||
output_lines.append("📝 Manual upload required (see instructions above)")
|
||||
else:
|
||||
output_lines.append("📤 To upload:")
|
||||
output_lines.append(f" skill-seekers upload {workflow_state['zip_path']} --target {target}")
|
||||
output_lines.append(
|
||||
f" skill-seekers upload {workflow_state['zip_path']} --target {target}"
|
||||
)
|
||||
else:
|
||||
output_lines.append("This was a dry run. No actions were taken.")
|
||||
output_lines.append("")
|
||||
|
||||
@@ -140,7 +140,13 @@ async def estimate_pages_tool(args: dict) -> list[TextContent]:
|
||||
timeout = max(300, max_discovery // 2) # Minimum 5 minutes
|
||||
|
||||
# Run estimate_pages.py
|
||||
cmd = [sys.executable, str(CLI_DIR / "estimate_pages.py"), config_path, "--max-discovery", str(max_discovery)]
|
||||
cmd = [
|
||||
sys.executable,
|
||||
str(CLI_DIR / "estimate_pages.py"),
|
||||
config_path,
|
||||
"--max-discovery",
|
||||
str(max_discovery),
|
||||
]
|
||||
|
||||
progress_msg = "🔄 Estimating page count...\n"
|
||||
progress_msg += f"⏱️ Maximum time: {timeout // 60} minutes\n\n"
|
||||
@@ -328,7 +334,11 @@ async def scrape_pdf_tool(args: dict) -> list[TextContent]:
|
||||
cmd.extend(["--from-json", from_json])
|
||||
|
||||
else:
|
||||
return [TextContent(type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text="❌ Error: Must specify --config, --pdf + --name, or --from-json"
|
||||
)
|
||||
]
|
||||
|
||||
# Run pdf_scraper.py with streaming (can take a while)
|
||||
timeout = 600 # 10 minutes for PDF extraction
|
||||
@@ -529,7 +539,11 @@ async def detect_patterns_tool(args: dict) -> list[TextContent]:
|
||||
directory = args.get("directory")
|
||||
|
||||
if not file_path and not directory:
|
||||
return [TextContent(type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter"
|
||||
)
|
||||
]
|
||||
|
||||
output = args.get("output", "")
|
||||
depth = args.get("depth", "deep")
|
||||
@@ -604,7 +618,11 @@ async def extract_test_examples_tool(args: dict) -> list[TextContent]:
|
||||
directory = args.get("directory")
|
||||
|
||||
if not file_path and not directory:
|
||||
return [TextContent(type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text="❌ Error: Must specify either 'file' or 'directory' parameter"
|
||||
)
|
||||
]
|
||||
|
||||
language = args.get("language", "")
|
||||
min_confidence = args.get("min_confidence", 0.5)
|
||||
@@ -688,7 +706,12 @@ async def build_how_to_guides_tool(args: dict) -> list[TextContent]:
|
||||
"""
|
||||
input_file = args.get("input")
|
||||
if not input_file:
|
||||
return [TextContent(type="text", text="❌ Error: input parameter is required (path to test_examples.json)")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: input parameter is required (path to test_examples.json)",
|
||||
)
|
||||
]
|
||||
|
||||
output = args.get("output", "output/codebase/tutorials")
|
||||
group_by = args.get("group_by", "ai-tutorial-group")
|
||||
|
||||
@@ -76,7 +76,12 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
|
||||
# MODE 1: Named Source (highest priority)
|
||||
if source_name:
|
||||
if not config_name:
|
||||
return [TextContent(type="text", text="❌ Error: config_name is required when using source parameter")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: config_name is required when using source parameter",
|
||||
)
|
||||
]
|
||||
|
||||
# Get source from registry
|
||||
source_manager = SourceManager()
|
||||
@@ -97,7 +102,11 @@ async def fetch_config_tool(args: dict) -> list[TextContent]:
|
||||
git_repo = GitConfigRepo()
|
||||
try:
|
||||
repo_path = git_repo.clone_or_pull(
|
||||
source_name=source_name, git_url=git_url, branch=branch, token=token, force_refresh=force_refresh
|
||||
source_name=source_name,
|
||||
git_url=git_url,
|
||||
branch=branch,
|
||||
token=token,
|
||||
force_refresh=force_refresh,
|
||||
)
|
||||
except Exception as e:
|
||||
return [TextContent(type="text", text=f"❌ Git error: {str(e)}")]
|
||||
@@ -139,7 +148,12 @@ Next steps:
|
||||
# MODE 2: Direct Git URL
|
||||
elif git_url:
|
||||
if not config_name:
|
||||
return [TextContent(type="text", text="❌ Error: config_name is required when using git_url parameter")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: config_name is required when using git_url parameter",
|
||||
)
|
||||
]
|
||||
|
||||
# Clone/pull repository
|
||||
git_repo = GitConfigRepo()
|
||||
@@ -237,7 +251,9 @@ Next steps:
|
||||
if tags:
|
||||
result += f" Tags: {tags}\n"
|
||||
|
||||
result += "\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
|
||||
result += (
|
||||
"\n💡 To download a config, use: fetch_config with config_name='<name>'\n"
|
||||
)
|
||||
result += f"📚 API Docs: {API_BASE_URL}/docs\n"
|
||||
|
||||
return [TextContent(type="text", text=result)]
|
||||
@@ -245,7 +261,10 @@ Next steps:
|
||||
# Download specific config
|
||||
if not config_name:
|
||||
return [
|
||||
TextContent(type="text", text="❌ Error: Please provide config_name or set list_available=true")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: Please provide config_name or set list_available=true",
|
||||
)
|
||||
]
|
||||
|
||||
# Get config details first
|
||||
@@ -305,11 +324,14 @@ Next steps:
|
||||
except httpx.HTTPError as e:
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later."
|
||||
type="text",
|
||||
text=f"❌ HTTP Error: {str(e)}\n\nCheck your internet connection or try again later.",
|
||||
)
|
||||
]
|
||||
except json.JSONDecodeError as e:
|
||||
return [TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ JSON Error: Invalid response from API: {str(e)}")
|
||||
]
|
||||
except Exception as e:
|
||||
return [TextContent(type="text", text=f"❌ Error: {str(e)}")]
|
||||
|
||||
@@ -335,7 +357,10 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
from github import Github, GithubException
|
||||
except ImportError:
|
||||
return [
|
||||
TextContent(type="text", text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub")
|
||||
TextContent(
|
||||
type="text",
|
||||
text="❌ Error: PyGithub not installed.\n\nInstall with: pip install PyGithub",
|
||||
)
|
||||
]
|
||||
|
||||
# Import config validator
|
||||
@@ -359,7 +384,9 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
if config_path:
|
||||
config_file = Path(config_path)
|
||||
if not config_file.exists():
|
||||
return [TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ Error: Config file not found: {config_path}")
|
||||
]
|
||||
|
||||
with open(config_file) as f:
|
||||
config_data = json.load(f)
|
||||
@@ -374,7 +401,11 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
return [TextContent(type="text", text=f"❌ Error: Invalid JSON: {str(e)}")]
|
||||
|
||||
else:
|
||||
return [TextContent(type="text", text="❌ Error: Must provide either config_path or config_json")]
|
||||
return [
|
||||
TextContent(
|
||||
type="text", text="❌ Error: Must provide either config_path or config_json"
|
||||
)
|
||||
]
|
||||
|
||||
# Use ConfigValidator for comprehensive validation
|
||||
if ConfigValidator is None:
|
||||
@@ -404,14 +435,20 @@ async def submit_config_tool(args: dict) -> list[TextContent]:
|
||||
if not is_unified:
|
||||
# Legacy config - check base_url
|
||||
base_url = config_data.get("base_url", "")
|
||||
if base_url and not (base_url.startswith("http://") or base_url.startswith("https://")):
|
||||
raise ValueError(f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://")
|
||||
if base_url and not (
|
||||
base_url.startswith("http://") or base_url.startswith("https://")
|
||||
):
|
||||
raise ValueError(
|
||||
f"Invalid base_url format: '{base_url}'\nURLs must start with http:// or https://"
|
||||
)
|
||||
else:
|
||||
# Unified config - check URLs in sources
|
||||
for idx, source in enumerate(config_data.get("sources", [])):
|
||||
if source.get("type") == "documentation":
|
||||
source_url = source.get("base_url", "")
|
||||
if source_url and not (source_url.startswith("http://") or source_url.startswith("https://")):
|
||||
if source_url and not (
|
||||
source_url.startswith("http://") or source_url.startswith("https://")
|
||||
):
|
||||
raise ValueError(
|
||||
f"Source {idx} (documentation): Invalid base_url format: '{source_url}'\nURLs must start with http:// or https://"
|
||||
)
|
||||
@@ -453,7 +490,10 @@ Please fix these issues and try again.
|
||||
# For legacy configs, use name-based detection
|
||||
name_lower = config_name.lower()
|
||||
category = "other"
|
||||
if any(x in name_lower for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]):
|
||||
if any(
|
||||
x in name_lower
|
||||
for x in ["react", "vue", "django", "laravel", "fastapi", "astro", "hono"]
|
||||
):
|
||||
category = "web-frameworks"
|
||||
elif any(x in name_lower for x in ["godot", "unity", "unreal"]):
|
||||
category = "game-engines"
|
||||
@@ -469,12 +509,16 @@ Please fix these issues and try again.
|
||||
if "max_pages" not in config_data:
|
||||
warnings.append("⚠️ No max_pages set - will use default (100)")
|
||||
elif config_data.get("max_pages") in (None, -1):
|
||||
warnings.append("⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours")
|
||||
warnings.append(
|
||||
"⚠️ Unlimited scraping enabled - may scrape thousands of pages and take hours"
|
||||
)
|
||||
else:
|
||||
# Unified config warnings
|
||||
for src in config_data.get("sources", []):
|
||||
if src.get("type") == "documentation" and "max_pages" not in src:
|
||||
warnings.append("⚠️ No max_pages set for documentation source - will use default (100)")
|
||||
warnings.append(
|
||||
"⚠️ No max_pages set for documentation source - will use default (100)"
|
||||
)
|
||||
elif src.get("type") == "documentation" and src.get("max_pages") in (None, -1):
|
||||
warnings.append("⚠️ Unlimited scraping enabled for documentation source")
|
||||
|
||||
@@ -529,7 +573,9 @@ Please fix these issues and try again.
|
||||
|
||||
# Create issue
|
||||
issue = repo.create_issue(
|
||||
title=f"[CONFIG] {config_name}", body=issue_body, labels=["config-submission", "needs-review"]
|
||||
title=f"[CONFIG] {config_name}",
|
||||
body=issue_body,
|
||||
labels=["config-submission", "needs-review"],
|
||||
)
|
||||
|
||||
result = f"""✅ Config submitted successfully!
|
||||
|
||||
@@ -183,7 +183,9 @@ async def generate_router(args: dict) -> list[TextContent]:
|
||||
config_files = glob.glob(config_pattern)
|
||||
|
||||
if not config_files:
|
||||
return [TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")]
|
||||
return [
|
||||
TextContent(type="text", text=f"❌ No config files match pattern: {config_pattern}")
|
||||
]
|
||||
|
||||
# Run generate_router.py
|
||||
cmd = [
|
||||
|
||||
Reference in New Issue
Block a user