style: Run black formatter on 16 files
Applied black formatting to files modified in linting fixes: Source files (8): - config_extractor.py - doc_scraper.py - how_to_guide_builder.py - llms_txt_downloader.py - llms_txt_parser.py - pattern_recognizer.py - test_example_extractor.py - unified_codebase_analyzer.py Test files (8): - test_architecture_scenarios.py - test_async_scraping.py - test_github_scraper.py - test_guide_enhancer.py - test_install_agent.py - test_issue_219_e2e.py - test_llms_txt_downloader.py - test_skip_llms_txt.py All formatting issues resolved. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -148,7 +148,9 @@ def infer_description_from_docs(
|
||||
|
||||
|
||||
class DocToSkillConverter:
|
||||
def __init__(self, config: dict[str, Any], dry_run: bool = False, resume: bool = False) -> None:
|
||||
def __init__(
|
||||
self, config: dict[str, Any], dry_run: bool = False, resume: bool = False
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.name = config["name"]
|
||||
self.base_url = config["base_url"]
|
||||
@@ -163,7 +165,9 @@ class DocToSkillConverter:
|
||||
# Checkpoint config
|
||||
checkpoint_config = config.get("checkpoint", {})
|
||||
self.checkpoint_enabled = checkpoint_config.get("enabled", False)
|
||||
self.checkpoint_interval = checkpoint_config.get("interval", DEFAULT_CHECKPOINT_INTERVAL)
|
||||
self.checkpoint_interval = checkpoint_config.get(
|
||||
"interval", DEFAULT_CHECKPOINT_INTERVAL
|
||||
)
|
||||
|
||||
# llms.txt detection state
|
||||
skip_llms_txt_value = config.get("skip_llms_txt", False)
|
||||
@@ -318,7 +322,9 @@ class DocToSkillConverter:
|
||||
for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
||||
text = self.clean_text(h.get_text())
|
||||
if text:
|
||||
page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")})
|
||||
page["headings"].append(
|
||||
{"level": h.name, "text": text, "id": h.get("id", "")}
|
||||
)
|
||||
|
||||
# Extract code with language detection
|
||||
code_selector = selectors.get("code_blocks", "pre code")
|
||||
@@ -385,7 +391,9 @@ class DocToSkillConverter:
|
||||
import re
|
||||
|
||||
# Detect if content is actually HTML (some .md URLs return HTML)
|
||||
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
|
||||
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith(
|
||||
"<html"
|
||||
):
|
||||
return self._extract_html_as_markdown(content, url)
|
||||
|
||||
page = {
|
||||
@@ -413,14 +421,20 @@ class DocToSkillConverter:
|
||||
level = len(match.group(1))
|
||||
text = match.group(2).strip()
|
||||
page["headings"].append(
|
||||
{"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")}
|
||||
{
|
||||
"level": f"h{level}",
|
||||
"text": text,
|
||||
"id": text.lower().replace(" ", "-"),
|
||||
}
|
||||
)
|
||||
|
||||
# Extract code blocks with language
|
||||
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
|
||||
for lang, code in code_blocks:
|
||||
if len(code.strip()) > 10:
|
||||
page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"})
|
||||
page["code_samples"].append(
|
||||
{"code": code.strip(), "language": lang or "unknown"}
|
||||
)
|
||||
|
||||
# Extract content (paragraphs)
|
||||
content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
|
||||
@@ -444,7 +458,11 @@ class DocToSkillConverter:
|
||||
# Strip anchor fragments
|
||||
full_url = full_url.split("#")[0]
|
||||
# Only include .md URLs to avoid client-side rendered HTML pages
|
||||
if ".md" in full_url and self.is_valid_url(full_url) and full_url not in page["links"]:
|
||||
if (
|
||||
".md" in full_url
|
||||
and self.is_valid_url(full_url)
|
||||
and full_url not in page["links"]
|
||||
):
|
||||
page["links"].append(full_url)
|
||||
|
||||
return page
|
||||
@@ -508,14 +526,18 @@ class DocToSkillConverter:
|
||||
for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
||||
text = self.clean_text(h.get_text())
|
||||
if text:
|
||||
page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")})
|
||||
page["headings"].append(
|
||||
{"level": h.name, "text": text, "id": h.get("id", "")}
|
||||
)
|
||||
|
||||
# Extract code blocks
|
||||
for code_elem in main.select("pre code, pre"):
|
||||
code = code_elem.get_text()
|
||||
if len(code.strip()) > 10:
|
||||
lang = self.detect_language(code_elem, code)
|
||||
page["code_samples"].append({"code": code.strip(), "language": lang})
|
||||
page["code_samples"].append(
|
||||
{"code": code.strip(), "language": lang}
|
||||
)
|
||||
|
||||
# Extract paragraphs
|
||||
paragraphs = []
|
||||
@@ -536,7 +558,9 @@ class DocToSkillConverter:
|
||||
|
||||
# Log low-confidence detections for debugging
|
||||
if confidence < 0.5:
|
||||
logger.debug(f"Low confidence language detection: {lang} ({confidence:.2f})")
|
||||
logger.debug(
|
||||
f"Low confidence language detection: {lang} ({confidence:.2f})"
|
||||
)
|
||||
|
||||
return lang # Return string for backward compatibility
|
||||
|
||||
@@ -549,7 +573,10 @@ class DocToSkillConverter:
|
||||
# Look for "Example:" or "Pattern:" sections
|
||||
for elem in main.find_all(["p", "div"]):
|
||||
text = elem.get_text().lower()
|
||||
if any(word in text for word in ["example:", "pattern:", "usage:", "typical use"]):
|
||||
if any(
|
||||
word in text
|
||||
for word in ["example:", "pattern:", "usage:", "typical use"]
|
||||
):
|
||||
# Get the code that follows
|
||||
next_code = elem.find_next(["pre", "code"])
|
||||
if next_code:
|
||||
@@ -571,7 +598,9 @@ class DocToSkillConverter:
|
||||
"""Save page data (skip pages with empty content)"""
|
||||
# Skip pages with empty or very short content
|
||||
if not page.get("content") or len(page.get("content", "")) < 50:
|
||||
logger.debug("Skipping page with empty/short content: %s", page.get("url", "unknown"))
|
||||
logger.debug(
|
||||
"Skipping page with empty/short content: %s", page.get("url", "unknown")
|
||||
)
|
||||
return
|
||||
|
||||
url_hash = hashlib.md5(page["url"].encode()).hexdigest()[:10]
|
||||
@@ -619,7 +648,10 @@ class DocToSkillConverter:
|
||||
|
||||
# Add new URLs
|
||||
for link in page["links"]:
|
||||
if link not in self.visited_urls and link not in self.pending_urls:
|
||||
if (
|
||||
link not in self.visited_urls
|
||||
and link not in self.pending_urls
|
||||
):
|
||||
self.pending_urls.append(link)
|
||||
else:
|
||||
# Single-threaded mode (no lock needed)
|
||||
@@ -640,7 +672,9 @@ class DocToSkillConverter:
|
||||
except Exception as e:
|
||||
if self.workers > 1:
|
||||
with self.lock:
|
||||
logger.error(" ✗ Error scraping %s: %s: %s", url, type(e).__name__, e)
|
||||
logger.error(
|
||||
" ✗ Error scraping %s: %s: %s", url, type(e).__name__, e
|
||||
)
|
||||
else:
|
||||
logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e)
|
||||
logger.error(" URL: %s", url)
|
||||
@@ -715,7 +749,8 @@ class DocToSkillConverter:
|
||||
md_urls.append(md_url)
|
||||
|
||||
logger.info(
|
||||
" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls)
|
||||
" ✓ Converted %d URLs to .md format (will validate during crawl)",
|
||||
len(md_urls),
|
||||
)
|
||||
return md_urls
|
||||
|
||||
@@ -757,7 +792,9 @@ class DocToSkillConverter:
|
||||
# Check for explicit config URL first
|
||||
explicit_url = self.config.get("llms_txt_url")
|
||||
if explicit_url:
|
||||
logger.info("\n📌 Using explicit llms_txt_url from config: %s", explicit_url)
|
||||
logger.info(
|
||||
"\n📌 Using explicit llms_txt_url from config: %s", explicit_url
|
||||
)
|
||||
|
||||
# Download explicit file first
|
||||
downloader = LlmsTxtDownloader(explicit_url)
|
||||
@@ -779,7 +816,8 @@ class DocToSkillConverter:
|
||||
|
||||
if variants:
|
||||
logger.info(
|
||||
"\n🔍 Found %d total variant(s), downloading remaining...", len(variants)
|
||||
"\n🔍 Found %d total variant(s), downloading remaining...",
|
||||
len(variants),
|
||||
)
|
||||
for variant_info in variants:
|
||||
url = variant_info["url"]
|
||||
@@ -800,7 +838,11 @@ class DocToSkillConverter:
|
||||
)
|
||||
with open(extra_filepath, "w", encoding="utf-8") as f:
|
||||
f.write(extra_content)
|
||||
logger.info(" ✓ %s (%d chars)", extra_filename, len(extra_content))
|
||||
logger.info(
|
||||
" ✓ %s (%d chars)",
|
||||
extra_filename,
|
||||
len(extra_content),
|
||||
)
|
||||
|
||||
# Parse explicit file for skill building
|
||||
parser = LlmsTxtParser(content, self.base_url)
|
||||
@@ -822,7 +864,8 @@ class DocToSkillConverter:
|
||||
self.pending_urls.append(url)
|
||||
|
||||
logger.info(
|
||||
" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls)
|
||||
" 📋 %d URLs added to crawl queue after filtering",
|
||||
len(self.pending_urls),
|
||||
)
|
||||
|
||||
# Return False to trigger HTML scraping with the populated pending_urls
|
||||
@@ -872,7 +915,9 @@ class DocToSkillConverter:
|
||||
logger.info(" ✓ %s (%d chars)", filename, len(content))
|
||||
|
||||
if not downloaded:
|
||||
logger.warning("⚠️ Failed to download any variants, falling back to HTML scraping")
|
||||
logger.warning(
|
||||
"⚠️ Failed to download any variants, falling back to HTML scraping"
|
||||
)
|
||||
return False
|
||||
|
||||
# Save ALL variants to references/
|
||||
@@ -906,7 +951,10 @@ class DocToSkillConverter:
|
||||
if self.is_valid_url(url) and url not in self.visited_urls:
|
||||
self.pending_urls.append(url)
|
||||
|
||||
logger.info(" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls))
|
||||
logger.info(
|
||||
" 📋 %d URLs added to crawl queue after filtering",
|
||||
len(self.pending_urls),
|
||||
)
|
||||
|
||||
# Return False to trigger HTML scraping with the populated pending_urls
|
||||
self.llms_txt_detected = True
|
||||
@@ -947,7 +995,8 @@ class DocToSkillConverter:
|
||||
llms_result = self._try_llms_txt()
|
||||
if llms_result:
|
||||
logger.info(
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping",
|
||||
self.llms_txt_variant,
|
||||
)
|
||||
self.save_summary()
|
||||
return
|
||||
@@ -983,7 +1032,9 @@ class DocToSkillConverter:
|
||||
|
||||
# Single-threaded mode (original sequential logic)
|
||||
if self.workers <= 1:
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
while self.pending_urls and (
|
||||
unlimited or len(self.visited_urls) < preview_limit
|
||||
):
|
||||
url = self.pending_urls.popleft()
|
||||
|
||||
if url in self.visited_urls:
|
||||
@@ -995,7 +1046,9 @@ class DocToSkillConverter:
|
||||
# Just show what would be scraped
|
||||
logger.info(" [Preview] %s", url)
|
||||
try:
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"}
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
@@ -1007,11 +1060,16 @@ class DocToSkillConverter:
|
||||
if main:
|
||||
for link in main.find_all("a", href=True):
|
||||
href = urljoin(url, link["href"])
|
||||
if self.is_valid_url(href) and href not in self.visited_urls:
|
||||
if (
|
||||
self.is_valid_url(href)
|
||||
and href not in self.visited_urls
|
||||
):
|
||||
self.pending_urls.append(href)
|
||||
except Exception as e:
|
||||
# Failed to extract links in fast mode, continue anyway
|
||||
logger.warning("⚠️ Warning: Could not extract links from %s: %s", url, e)
|
||||
logger.warning(
|
||||
"⚠️ Warning: Could not extract links from %s: %s", url, e
|
||||
)
|
||||
else:
|
||||
self.scrape_page(url)
|
||||
self.pages_scraped += 1
|
||||
@@ -1034,7 +1092,9 @@ class DocToSkillConverter:
|
||||
with ThreadPoolExecutor(max_workers=self.workers) as executor:
|
||||
futures = []
|
||||
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
while self.pending_urls and (
|
||||
unlimited or len(self.visited_urls) < preview_limit
|
||||
):
|
||||
# Get next batch of URLs (thread-safe)
|
||||
batch = []
|
||||
batch_size = min(self.workers * 2, len(self.pending_urls))
|
||||
@@ -1092,9 +1152,14 @@ class DocToSkillConverter:
|
||||
self.pages_scraped += 1
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
|
||||
logger.info(
|
||||
"\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)
|
||||
)
|
||||
if len(self.visited_urls) >= preview_limit:
|
||||
logger.info(" (showing first %d, actual scraping may find more)", preview_limit)
|
||||
logger.info(
|
||||
" (showing first %d, actual scraping may find more)",
|
||||
preview_limit,
|
||||
)
|
||||
logger.info("\n💡 To actually scrape, run without --dry-run")
|
||||
else:
|
||||
logger.info("\n✅ Scraped %d pages", len(self.visited_urls))
|
||||
@@ -1114,7 +1179,8 @@ class DocToSkillConverter:
|
||||
llms_result = self._try_llms_txt()
|
||||
if llms_result:
|
||||
logger.info(
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping",
|
||||
self.llms_txt_variant,
|
||||
)
|
||||
self.save_summary()
|
||||
return
|
||||
@@ -1155,7 +1221,9 @@ class DocToSkillConverter:
|
||||
) as client:
|
||||
tasks = []
|
||||
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
while self.pending_urls and (
|
||||
unlimited or len(self.visited_urls) < preview_limit
|
||||
):
|
||||
# Get next batch of URLs
|
||||
batch = []
|
||||
batch_size = min(self.workers * 2, len(self.pending_urls))
|
||||
@@ -1191,7 +1259,11 @@ class DocToSkillConverter:
|
||||
logger.info(" [%d pages scraped]", self.pages_scraped)
|
||||
|
||||
# Checkpoint saving
|
||||
if not self.dry_run and self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0:
|
||||
if (
|
||||
not self.dry_run
|
||||
and self.checkpoint_enabled
|
||||
and self.pages_scraped % self.checkpoint_interval == 0
|
||||
):
|
||||
self.save_checkpoint()
|
||||
|
||||
# Wait for any remaining tasks
|
||||
@@ -1199,10 +1271,13 @@ class DocToSkillConverter:
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
|
||||
logger.info(
|
||||
"\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)
|
||||
)
|
||||
if len(self.visited_urls) >= preview_limit:
|
||||
logger.info(
|
||||
" (showing first %d, actual scraping may find more)", int(preview_limit)
|
||||
" (showing first %d, actual scraping may find more)",
|
||||
int(preview_limit),
|
||||
)
|
||||
logger.info("\n💡 To actually scrape, run without --dry-run")
|
||||
else:
|
||||
@@ -1237,7 +1312,10 @@ class DocToSkillConverter:
|
||||
pages.append(json.load(f))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e
|
||||
"⚠️ Error loading scraped data file %s: %s: %s",
|
||||
json_file,
|
||||
type(e).__name__,
|
||||
e,
|
||||
)
|
||||
logger.error(
|
||||
" Suggestion: File may be corrupted, consider re-scraping with --fresh"
|
||||
@@ -1245,7 +1323,9 @@ class DocToSkillConverter:
|
||||
|
||||
return pages
|
||||
|
||||
def smart_categorize(self, pages: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
|
||||
def smart_categorize(
|
||||
self, pages: list[dict[str, Any]]
|
||||
) -> dict[str, list[dict[str, Any]]]:
|
||||
"""Improved categorization with better pattern matching"""
|
||||
category_defs = self.config.get("categories", {})
|
||||
|
||||
@@ -1297,14 +1377,18 @@ class DocToSkillConverter:
|
||||
for page in pages:
|
||||
path = urlparse(page["url"]).path
|
||||
segments = [
|
||||
s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]
|
||||
s
|
||||
for s in path.split("/")
|
||||
if s and s not in ["en", "stable", "latest", "docs"]
|
||||
]
|
||||
|
||||
for seg in segments:
|
||||
url_segments[seg] += 1
|
||||
|
||||
# Top segments become categories
|
||||
top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[:8]
|
||||
top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[
|
||||
:8
|
||||
]
|
||||
|
||||
categories = {}
|
||||
for seg, count in top_segments:
|
||||
@@ -1324,7 +1408,9 @@ class DocToSkillConverter:
|
||||
|
||||
return categories
|
||||
|
||||
def generate_quick_reference(self, pages: list[dict[str, Any]]) -> list[dict[str, str]]:
|
||||
def generate_quick_reference(
|
||||
self, pages: list[dict[str, Any]]
|
||||
) -> list[dict[str, str]]:
|
||||
"""Generate quick reference from common patterns (NEW FEATURE)"""
|
||||
quick_ref = []
|
||||
|
||||
@@ -1393,7 +1479,9 @@ class DocToSkillConverter:
|
||||
logger.info(" ✓ %s.md (%d pages)", category, len(pages))
|
||||
|
||||
def create_enhanced_skill_md(
|
||||
self, categories: dict[str, list[dict[str, Any]]], quick_ref: list[dict[str, str]]
|
||||
self,
|
||||
categories: dict[str, list[dict[str, Any]]],
|
||||
quick_ref: list[dict[str, str]],
|
||||
) -> None:
|
||||
"""Create SKILL.md with actual examples (IMPROVED)"""
|
||||
# Try to infer description if not in config
|
||||
@@ -1404,7 +1492,9 @@ class DocToSkillConverter:
|
||||
if pages:
|
||||
first_page_html = pages[0].get("raw_html", "")
|
||||
break
|
||||
description = infer_description_from_docs(self.base_url, first_page_html, self.name)
|
||||
description = infer_description_from_docs(
|
||||
self.base_url, first_page_html, self.name
|
||||
)
|
||||
else:
|
||||
description = self.config["description"]
|
||||
|
||||
@@ -1412,7 +1502,9 @@ class DocToSkillConverter:
|
||||
example_codes = []
|
||||
for pages in categories.values():
|
||||
for page in pages[:3]: # First 3 pages per category
|
||||
for sample in page.get("code_samples", [])[:2]: # First 2 samples per page
|
||||
for sample in page.get("code_samples", [])[
|
||||
:2
|
||||
]: # First 2 samples per page
|
||||
code = sample.get("code", sample if isinstance(sample, str) else "")
|
||||
lang = sample.get("language", "unknown")
|
||||
if len(code) < 200 and lang != "unknown":
|
||||
@@ -1462,7 +1554,9 @@ This skill should be triggered when:
|
||||
content += pattern.get("code", "")[:300]
|
||||
content += "\n```\n\n"
|
||||
else:
|
||||
content += "*Quick reference patterns will be added as you use the skill.*\n\n"
|
||||
content += (
|
||||
"*Quick reference patterns will be added as you use the skill.*\n\n"
|
||||
)
|
||||
|
||||
# Add example codes from docs
|
||||
if example_codes:
|
||||
@@ -1477,7 +1571,9 @@ This skill includes comprehensive documentation in `references/`:
|
||||
"""
|
||||
|
||||
for cat in sorted(categories.keys()):
|
||||
content += f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
|
||||
content += (
|
||||
f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
|
||||
)
|
||||
|
||||
content += """
|
||||
Use `view` to read specific reference files when detailed information is needed.
|
||||
@@ -1625,7 +1721,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
)
|
||||
|
||||
# Validate base_url
|
||||
if "base_url" in config and not config["base_url"].startswith(("http://", "https://")):
|
||||
if "base_url" in config and not config["base_url"].startswith(
|
||||
("http://", "https://")
|
||||
):
|
||||
errors.append(
|
||||
f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)"
|
||||
)
|
||||
@@ -1648,7 +1746,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
errors.append("'url_patterns' must be a dictionary")
|
||||
else:
|
||||
for key in ["include", "exclude"]:
|
||||
if key in config["url_patterns"] and not isinstance(config["url_patterns"][key], list):
|
||||
if key in config["url_patterns"] and not isinstance(
|
||||
config["url_patterns"][key], list
|
||||
):
|
||||
errors.append(f"'url_patterns.{key}' must be a list")
|
||||
|
||||
# Validate categories
|
||||
@@ -1740,12 +1840,18 @@ def load_config(config_path: str) -> dict[str, Any]:
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error("❌ Error: Invalid JSON in config file: %s", config_path)
|
||||
logger.error(" Details: %s", e)
|
||||
logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno)
|
||||
logger.error(
|
||||
" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno
|
||||
)
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
logger.error("❌ Error: Config file not found: %s", config_path)
|
||||
logger.error(" Suggestion: Create a config file or use an existing one from configs/")
|
||||
logger.error(" Available configs: react.json, vue.json, django.json, godot.json")
|
||||
logger.error(
|
||||
" Suggestion: Create a config file or use an existing one from configs/"
|
||||
)
|
||||
logger.error(
|
||||
" Available configs: react.json, vue.json, django.json, godot.json"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Validate config
|
||||
@@ -1763,7 +1869,9 @@ def load_config(config_path: str) -> dict[str, Any]:
|
||||
logger.error("❌ Configuration validation errors in %s:", config_path)
|
||||
for error in errors:
|
||||
logger.error(" - %s", error)
|
||||
logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples")
|
||||
logger.error(
|
||||
"\n Suggestion: Fix the above errors or check configs/ for working examples"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
return config
|
||||
@@ -1870,10 +1978,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--interactive", "-i", action="store_true", help="Interactive configuration mode"
|
||||
"--interactive",
|
||||
"-i",
|
||||
action="store_true",
|
||||
help="Interactive configuration mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)"
|
||||
"--config",
|
||||
"-c",
|
||||
type=str,
|
||||
help="Load configuration from file (e.g., configs/godot.json)",
|
||||
)
|
||||
parser.add_argument("--name", type=str, help="Skill name")
|
||||
parser.add_argument("--url", type=str, help="Base documentation URL")
|
||||
@@ -1902,14 +2016,18 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
help="Open terminal window for enhancement (use with --enhance-local)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
|
||||
"--api-key",
|
||||
type=str,
|
||||
help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
action="store_true",
|
||||
help="Resume from last checkpoint (for interrupted scrapes)",
|
||||
)
|
||||
parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh")
|
||||
parser.add_argument(
|
||||
"--fresh", action="store_true", help="Clear checkpoint and start fresh"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rate-limit",
|
||||
"-r",
|
||||
@@ -1936,10 +2054,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
help="Disable rate limiting completely (same as --rate-limit 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)"
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="Enable verbose output (DEBUG level logging)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)"
|
||||
"--quiet",
|
||||
"-q",
|
||||
action="store_true",
|
||||
help="Minimize output (WARNING level logging only)",
|
||||
)
|
||||
|
||||
return parser
|
||||
@@ -2002,11 +2126,15 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
|
||||
if args.workers:
|
||||
# Validate workers count
|
||||
if args.workers < 1:
|
||||
logger.error("❌ Error: --workers must be at least 1 (got %d)", args.workers)
|
||||
logger.error(
|
||||
"❌ Error: --workers must be at least 1 (got %d)", args.workers
|
||||
)
|
||||
logger.error(" Suggestion: Use --workers 1 (default) or omit the flag")
|
||||
sys.exit(1)
|
||||
if args.workers > 10:
|
||||
logger.warning("⚠️ Warning: --workers capped at 10 (requested %d)", args.workers)
|
||||
logger.warning(
|
||||
"⚠️ Warning: --workers capped at 10 (requested %d)", args.workers
|
||||
)
|
||||
args.workers = 10
|
||||
config["workers"] = args.workers
|
||||
if args.workers > 1:
|
||||
@@ -2160,7 +2288,11 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
|
||||
logger.info("=" * 60 + "\n")
|
||||
|
||||
try:
|
||||
enhance_cmd = ["python3", "cli/enhance_skill.py", f"output/{config['name']}/"]
|
||||
enhance_cmd = [
|
||||
"python3",
|
||||
"cli/enhance_skill.py",
|
||||
f"output/{config['name']}/",
|
||||
]
|
||||
if args.api_key:
|
||||
enhance_cmd.extend(["--api-key", args.api_key])
|
||||
|
||||
@@ -2204,9 +2336,14 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
|
||||
# Suggest enhancement if not done
|
||||
if not args.enhance and not args.enhance_local:
|
||||
logger.info("\n💡 Optional: Enhance SKILL.md with Claude:")
|
||||
logger.info(" Local (recommended): skill-seekers-enhance output/%s/", config["name"])
|
||||
logger.info(
|
||||
" Local (recommended): skill-seekers-enhance output/%s/", config["name"]
|
||||
)
|
||||
logger.info(" or re-run with: --enhance-local")
|
||||
logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"])
|
||||
logger.info(
|
||||
" API-based: skill-seekers-enhance-api output/%s/",
|
||||
config["name"],
|
||||
)
|
||||
logger.info(" or re-run with: --enhance")
|
||||
logger.info(
|
||||
"\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window"
|
||||
|
||||
Reference in New Issue
Block a user