style: Run black formatter on 16 files
Applied black formatting to files modified in linting fixes: Source files (8): - config_extractor.py - doc_scraper.py - how_to_guide_builder.py - llms_txt_downloader.py - llms_txt_parser.py - pattern_recognizer.py - test_example_extractor.py - unified_codebase_analyzer.py Test files (8): - test_architecture_scenarios.py - test_async_scraping.py - test_github_scraper.py - test_guide_enhancer.py - test_install_agent.py - test_issue_219_e2e.py - test_llms_txt_downloader.py - test_skip_llms_txt.py All formatting issues resolved. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -65,7 +65,15 @@ class ConfigFile:
|
||||
file_path: str
|
||||
relative_path: str
|
||||
config_type: Literal[
|
||||
"json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose"
|
||||
"json",
|
||||
"yaml",
|
||||
"toml",
|
||||
"env",
|
||||
"ini",
|
||||
"python",
|
||||
"javascript",
|
||||
"dockerfile",
|
||||
"docker-compose",
|
||||
]
|
||||
purpose: str # Inferred purpose: database, api, logging, etc.
|
||||
settings: list[ConfigSetting] = field(default_factory=list)
|
||||
@@ -81,7 +89,9 @@ class ConfigExtractionResult:
|
||||
config_files: list[ConfigFile] = field(default_factory=list)
|
||||
total_files: int = 0
|
||||
total_settings: int = 0
|
||||
detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files
|
||||
detected_patterns: dict[str, list[str]] = field(
|
||||
default_factory=dict
|
||||
) # pattern -> files
|
||||
errors: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
@@ -195,7 +205,12 @@ class ConfigFileDetector:
|
||||
},
|
||||
"javascript": {
|
||||
"patterns": ["*.config.js", "*.config.ts"],
|
||||
"names": ["config.js", "next.config.js", "vue.config.js", "webpack.config.js"],
|
||||
"names": [
|
||||
"config.js",
|
||||
"next.config.js",
|
||||
"vue.config.js",
|
||||
"webpack.config.js",
|
||||
],
|
||||
},
|
||||
"dockerfile": {
|
||||
"patterns": ["Dockerfile*"],
|
||||
@@ -226,7 +241,9 @@ class ConfigFileDetector:
|
||||
"*.egg-info",
|
||||
}
|
||||
|
||||
def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]:
|
||||
def find_config_files(
|
||||
self, directory: Path, max_files: int = 100
|
||||
) -> list[ConfigFile]:
|
||||
"""
|
||||
Find all configuration files in directory.
|
||||
|
||||
@@ -297,7 +314,10 @@ class ConfigFileDetector:
|
||||
filename = file_path.name.lower()
|
||||
|
||||
# Database configs
|
||||
if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]):
|
||||
if any(
|
||||
word in path_lower
|
||||
for word in ["database", "db", "postgres", "mysql", "mongo"]
|
||||
):
|
||||
return "database_configuration"
|
||||
|
||||
# API configs
|
||||
@@ -313,7 +333,9 @@ class ConfigFileDetector:
|
||||
return "docker_configuration"
|
||||
|
||||
# CI/CD configs
|
||||
if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]):
|
||||
if any(
|
||||
word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]
|
||||
):
|
||||
return "ci_cd_configuration"
|
||||
|
||||
# Package configs
|
||||
@@ -325,7 +347,11 @@ class ConfigFileDetector:
|
||||
return "typescript_configuration"
|
||||
|
||||
# Framework configs
|
||||
if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename:
|
||||
if (
|
||||
"next.config" in filename
|
||||
or "vue.config" in filename
|
||||
or "webpack.config" in filename
|
||||
):
|
||||
return "framework_configuration"
|
||||
|
||||
# Environment configs
|
||||
@@ -467,7 +493,12 @@ class ConfigParser:
|
||||
|
||||
for node in ast.walk(tree):
|
||||
# Get variable name and skip private variables
|
||||
if isinstance(node, ast.Assign) and len(node.targets) == 1 and isinstance(node.targets[0], ast.Name) and not node.targets[0].id.startswith("_"):
|
||||
if (
|
||||
isinstance(node, ast.Assign)
|
||||
and len(node.targets) == 1
|
||||
and isinstance(node.targets[0], ast.Name)
|
||||
and not node.targets[0].id.startswith("_")
|
||||
):
|
||||
key = node.targets[0].id
|
||||
|
||||
# Extract value
|
||||
@@ -500,7 +531,9 @@ class ConfigParser:
|
||||
for match in re.finditer(pattern, config_file.raw_content):
|
||||
if len(match.groups()) >= 2:
|
||||
key = match.group(1)
|
||||
value = match.group(3) if len(match.groups()) > 2 else match.group(2)
|
||||
value = (
|
||||
match.group(3) if len(match.groups()) > 2 else match.group(2)
|
||||
)
|
||||
|
||||
setting = ConfigSetting(
|
||||
key=key, value=value, value_type=self._infer_type(value)
|
||||
@@ -546,7 +579,9 @@ class ConfigParser:
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
# Recurse into nested dicts
|
||||
self._extract_settings_from_dict(value, config_file, parent_path + [key])
|
||||
self._extract_settings_from_dict(
|
||||
value, config_file, parent_path + [key]
|
||||
)
|
||||
else:
|
||||
setting = ConfigSetting(
|
||||
key=".".join(parent_path + [key]) if parent_path else key,
|
||||
@@ -593,11 +628,26 @@ class ConfigPatternDetector:
|
||||
# Known configuration patterns
|
||||
KNOWN_PATTERNS = {
|
||||
"database_config": {
|
||||
"keys": ["host", "port", "database", "user", "username", "password", "db_name"],
|
||||
"keys": [
|
||||
"host",
|
||||
"port",
|
||||
"database",
|
||||
"user",
|
||||
"username",
|
||||
"password",
|
||||
"db_name",
|
||||
],
|
||||
"min_match": 3,
|
||||
},
|
||||
"api_config": {
|
||||
"keys": ["base_url", "api_key", "api_secret", "timeout", "retry", "endpoint"],
|
||||
"keys": [
|
||||
"base_url",
|
||||
"api_key",
|
||||
"api_secret",
|
||||
"timeout",
|
||||
"retry",
|
||||
"endpoint",
|
||||
],
|
||||
"min_match": 2,
|
||||
},
|
||||
"logging_config": {
|
||||
@@ -822,7 +872,9 @@ def main():
|
||||
print("\n📊 Summary:")
|
||||
print(f" Config files found: {result.total_files}")
|
||||
print(f" Total settings: {result.total_settings}")
|
||||
print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}")
|
||||
print(
|
||||
f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}"
|
||||
)
|
||||
|
||||
if "ai_enhancements" in output_dict:
|
||||
print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)")
|
||||
|
||||
@@ -148,7 +148,9 @@ def infer_description_from_docs(
|
||||
|
||||
|
||||
class DocToSkillConverter:
|
||||
def __init__(self, config: dict[str, Any], dry_run: bool = False, resume: bool = False) -> None:
|
||||
def __init__(
|
||||
self, config: dict[str, Any], dry_run: bool = False, resume: bool = False
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.name = config["name"]
|
||||
self.base_url = config["base_url"]
|
||||
@@ -163,7 +165,9 @@ class DocToSkillConverter:
|
||||
# Checkpoint config
|
||||
checkpoint_config = config.get("checkpoint", {})
|
||||
self.checkpoint_enabled = checkpoint_config.get("enabled", False)
|
||||
self.checkpoint_interval = checkpoint_config.get("interval", DEFAULT_CHECKPOINT_INTERVAL)
|
||||
self.checkpoint_interval = checkpoint_config.get(
|
||||
"interval", DEFAULT_CHECKPOINT_INTERVAL
|
||||
)
|
||||
|
||||
# llms.txt detection state
|
||||
skip_llms_txt_value = config.get("skip_llms_txt", False)
|
||||
@@ -318,7 +322,9 @@ class DocToSkillConverter:
|
||||
for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
||||
text = self.clean_text(h.get_text())
|
||||
if text:
|
||||
page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")})
|
||||
page["headings"].append(
|
||||
{"level": h.name, "text": text, "id": h.get("id", "")}
|
||||
)
|
||||
|
||||
# Extract code with language detection
|
||||
code_selector = selectors.get("code_blocks", "pre code")
|
||||
@@ -385,7 +391,9 @@ class DocToSkillConverter:
|
||||
import re
|
||||
|
||||
# Detect if content is actually HTML (some .md URLs return HTML)
|
||||
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
|
||||
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith(
|
||||
"<html"
|
||||
):
|
||||
return self._extract_html_as_markdown(content, url)
|
||||
|
||||
page = {
|
||||
@@ -413,14 +421,20 @@ class DocToSkillConverter:
|
||||
level = len(match.group(1))
|
||||
text = match.group(2).strip()
|
||||
page["headings"].append(
|
||||
{"level": f"h{level}", "text": text, "id": text.lower().replace(" ", "-")}
|
||||
{
|
||||
"level": f"h{level}",
|
||||
"text": text,
|
||||
"id": text.lower().replace(" ", "-"),
|
||||
}
|
||||
)
|
||||
|
||||
# Extract code blocks with language
|
||||
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
|
||||
for lang, code in code_blocks:
|
||||
if len(code.strip()) > 10:
|
||||
page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"})
|
||||
page["code_samples"].append(
|
||||
{"code": code.strip(), "language": lang or "unknown"}
|
||||
)
|
||||
|
||||
# Extract content (paragraphs)
|
||||
content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
|
||||
@@ -444,7 +458,11 @@ class DocToSkillConverter:
|
||||
# Strip anchor fragments
|
||||
full_url = full_url.split("#")[0]
|
||||
# Only include .md URLs to avoid client-side rendered HTML pages
|
||||
if ".md" in full_url and self.is_valid_url(full_url) and full_url not in page["links"]:
|
||||
if (
|
||||
".md" in full_url
|
||||
and self.is_valid_url(full_url)
|
||||
and full_url not in page["links"]
|
||||
):
|
||||
page["links"].append(full_url)
|
||||
|
||||
return page
|
||||
@@ -508,14 +526,18 @@ class DocToSkillConverter:
|
||||
for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
|
||||
text = self.clean_text(h.get_text())
|
||||
if text:
|
||||
page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")})
|
||||
page["headings"].append(
|
||||
{"level": h.name, "text": text, "id": h.get("id", "")}
|
||||
)
|
||||
|
||||
# Extract code blocks
|
||||
for code_elem in main.select("pre code, pre"):
|
||||
code = code_elem.get_text()
|
||||
if len(code.strip()) > 10:
|
||||
lang = self.detect_language(code_elem, code)
|
||||
page["code_samples"].append({"code": code.strip(), "language": lang})
|
||||
page["code_samples"].append(
|
||||
{"code": code.strip(), "language": lang}
|
||||
)
|
||||
|
||||
# Extract paragraphs
|
||||
paragraphs = []
|
||||
@@ -536,7 +558,9 @@ class DocToSkillConverter:
|
||||
|
||||
# Log low-confidence detections for debugging
|
||||
if confidence < 0.5:
|
||||
logger.debug(f"Low confidence language detection: {lang} ({confidence:.2f})")
|
||||
logger.debug(
|
||||
f"Low confidence language detection: {lang} ({confidence:.2f})"
|
||||
)
|
||||
|
||||
return lang # Return string for backward compatibility
|
||||
|
||||
@@ -549,7 +573,10 @@ class DocToSkillConverter:
|
||||
# Look for "Example:" or "Pattern:" sections
|
||||
for elem in main.find_all(["p", "div"]):
|
||||
text = elem.get_text().lower()
|
||||
if any(word in text for word in ["example:", "pattern:", "usage:", "typical use"]):
|
||||
if any(
|
||||
word in text
|
||||
for word in ["example:", "pattern:", "usage:", "typical use"]
|
||||
):
|
||||
# Get the code that follows
|
||||
next_code = elem.find_next(["pre", "code"])
|
||||
if next_code:
|
||||
@@ -571,7 +598,9 @@ class DocToSkillConverter:
|
||||
"""Save page data (skip pages with empty content)"""
|
||||
# Skip pages with empty or very short content
|
||||
if not page.get("content") or len(page.get("content", "")) < 50:
|
||||
logger.debug("Skipping page with empty/short content: %s", page.get("url", "unknown"))
|
||||
logger.debug(
|
||||
"Skipping page with empty/short content: %s", page.get("url", "unknown")
|
||||
)
|
||||
return
|
||||
|
||||
url_hash = hashlib.md5(page["url"].encode()).hexdigest()[:10]
|
||||
@@ -619,7 +648,10 @@ class DocToSkillConverter:
|
||||
|
||||
# Add new URLs
|
||||
for link in page["links"]:
|
||||
if link not in self.visited_urls and link not in self.pending_urls:
|
||||
if (
|
||||
link not in self.visited_urls
|
||||
and link not in self.pending_urls
|
||||
):
|
||||
self.pending_urls.append(link)
|
||||
else:
|
||||
# Single-threaded mode (no lock needed)
|
||||
@@ -640,7 +672,9 @@ class DocToSkillConverter:
|
||||
except Exception as e:
|
||||
if self.workers > 1:
|
||||
with self.lock:
|
||||
logger.error(" ✗ Error scraping %s: %s: %s", url, type(e).__name__, e)
|
||||
logger.error(
|
||||
" ✗ Error scraping %s: %s: %s", url, type(e).__name__, e
|
||||
)
|
||||
else:
|
||||
logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e)
|
||||
logger.error(" URL: %s", url)
|
||||
@@ -715,7 +749,8 @@ class DocToSkillConverter:
|
||||
md_urls.append(md_url)
|
||||
|
||||
logger.info(
|
||||
" ✓ Converted %d URLs to .md format (will validate during crawl)", len(md_urls)
|
||||
" ✓ Converted %d URLs to .md format (will validate during crawl)",
|
||||
len(md_urls),
|
||||
)
|
||||
return md_urls
|
||||
|
||||
@@ -757,7 +792,9 @@ class DocToSkillConverter:
|
||||
# Check for explicit config URL first
|
||||
explicit_url = self.config.get("llms_txt_url")
|
||||
if explicit_url:
|
||||
logger.info("\n📌 Using explicit llms_txt_url from config: %s", explicit_url)
|
||||
logger.info(
|
||||
"\n📌 Using explicit llms_txt_url from config: %s", explicit_url
|
||||
)
|
||||
|
||||
# Download explicit file first
|
||||
downloader = LlmsTxtDownloader(explicit_url)
|
||||
@@ -779,7 +816,8 @@ class DocToSkillConverter:
|
||||
|
||||
if variants:
|
||||
logger.info(
|
||||
"\n🔍 Found %d total variant(s), downloading remaining...", len(variants)
|
||||
"\n🔍 Found %d total variant(s), downloading remaining...",
|
||||
len(variants),
|
||||
)
|
||||
for variant_info in variants:
|
||||
url = variant_info["url"]
|
||||
@@ -800,7 +838,11 @@ class DocToSkillConverter:
|
||||
)
|
||||
with open(extra_filepath, "w", encoding="utf-8") as f:
|
||||
f.write(extra_content)
|
||||
logger.info(" ✓ %s (%d chars)", extra_filename, len(extra_content))
|
||||
logger.info(
|
||||
" ✓ %s (%d chars)",
|
||||
extra_filename,
|
||||
len(extra_content),
|
||||
)
|
||||
|
||||
# Parse explicit file for skill building
|
||||
parser = LlmsTxtParser(content, self.base_url)
|
||||
@@ -822,7 +864,8 @@ class DocToSkillConverter:
|
||||
self.pending_urls.append(url)
|
||||
|
||||
logger.info(
|
||||
" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls)
|
||||
" 📋 %d URLs added to crawl queue after filtering",
|
||||
len(self.pending_urls),
|
||||
)
|
||||
|
||||
# Return False to trigger HTML scraping with the populated pending_urls
|
||||
@@ -872,7 +915,9 @@ class DocToSkillConverter:
|
||||
logger.info(" ✓ %s (%d chars)", filename, len(content))
|
||||
|
||||
if not downloaded:
|
||||
logger.warning("⚠️ Failed to download any variants, falling back to HTML scraping")
|
||||
logger.warning(
|
||||
"⚠️ Failed to download any variants, falling back to HTML scraping"
|
||||
)
|
||||
return False
|
||||
|
||||
# Save ALL variants to references/
|
||||
@@ -906,7 +951,10 @@ class DocToSkillConverter:
|
||||
if self.is_valid_url(url) and url not in self.visited_urls:
|
||||
self.pending_urls.append(url)
|
||||
|
||||
logger.info(" 📋 %d URLs added to crawl queue after filtering", len(self.pending_urls))
|
||||
logger.info(
|
||||
" 📋 %d URLs added to crawl queue after filtering",
|
||||
len(self.pending_urls),
|
||||
)
|
||||
|
||||
# Return False to trigger HTML scraping with the populated pending_urls
|
||||
self.llms_txt_detected = True
|
||||
@@ -947,7 +995,8 @@ class DocToSkillConverter:
|
||||
llms_result = self._try_llms_txt()
|
||||
if llms_result:
|
||||
logger.info(
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping",
|
||||
self.llms_txt_variant,
|
||||
)
|
||||
self.save_summary()
|
||||
return
|
||||
@@ -983,7 +1032,9 @@ class DocToSkillConverter:
|
||||
|
||||
# Single-threaded mode (original sequential logic)
|
||||
if self.workers <= 1:
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
while self.pending_urls and (
|
||||
unlimited or len(self.visited_urls) < preview_limit
|
||||
):
|
||||
url = self.pending_urls.popleft()
|
||||
|
||||
if url in self.visited_urls:
|
||||
@@ -995,7 +1046,9 @@ class DocToSkillConverter:
|
||||
# Just show what would be scraped
|
||||
logger.info(" [Preview] %s", url)
|
||||
try:
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"}
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
@@ -1007,11 +1060,16 @@ class DocToSkillConverter:
|
||||
if main:
|
||||
for link in main.find_all("a", href=True):
|
||||
href = urljoin(url, link["href"])
|
||||
if self.is_valid_url(href) and href not in self.visited_urls:
|
||||
if (
|
||||
self.is_valid_url(href)
|
||||
and href not in self.visited_urls
|
||||
):
|
||||
self.pending_urls.append(href)
|
||||
except Exception as e:
|
||||
# Failed to extract links in fast mode, continue anyway
|
||||
logger.warning("⚠️ Warning: Could not extract links from %s: %s", url, e)
|
||||
logger.warning(
|
||||
"⚠️ Warning: Could not extract links from %s: %s", url, e
|
||||
)
|
||||
else:
|
||||
self.scrape_page(url)
|
||||
self.pages_scraped += 1
|
||||
@@ -1034,7 +1092,9 @@ class DocToSkillConverter:
|
||||
with ThreadPoolExecutor(max_workers=self.workers) as executor:
|
||||
futures = []
|
||||
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
while self.pending_urls and (
|
||||
unlimited or len(self.visited_urls) < preview_limit
|
||||
):
|
||||
# Get next batch of URLs (thread-safe)
|
||||
batch = []
|
||||
batch_size = min(self.workers * 2, len(self.pending_urls))
|
||||
@@ -1092,9 +1152,14 @@ class DocToSkillConverter:
|
||||
self.pages_scraped += 1
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
|
||||
logger.info(
|
||||
"\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)
|
||||
)
|
||||
if len(self.visited_urls) >= preview_limit:
|
||||
logger.info(" (showing first %d, actual scraping may find more)", preview_limit)
|
||||
logger.info(
|
||||
" (showing first %d, actual scraping may find more)",
|
||||
preview_limit,
|
||||
)
|
||||
logger.info("\n💡 To actually scrape, run without --dry-run")
|
||||
else:
|
||||
logger.info("\n✅ Scraped %d pages", len(self.visited_urls))
|
||||
@@ -1114,7 +1179,8 @@ class DocToSkillConverter:
|
||||
llms_result = self._try_llms_txt()
|
||||
if llms_result:
|
||||
logger.info(
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant
|
||||
"\n✅ Used llms.txt (%s) - skipping HTML scraping",
|
||||
self.llms_txt_variant,
|
||||
)
|
||||
self.save_summary()
|
||||
return
|
||||
@@ -1155,7 +1221,9 @@ class DocToSkillConverter:
|
||||
) as client:
|
||||
tasks = []
|
||||
|
||||
while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
|
||||
while self.pending_urls and (
|
||||
unlimited or len(self.visited_urls) < preview_limit
|
||||
):
|
||||
# Get next batch of URLs
|
||||
batch = []
|
||||
batch_size = min(self.workers * 2, len(self.pending_urls))
|
||||
@@ -1191,7 +1259,11 @@ class DocToSkillConverter:
|
||||
logger.info(" [%d pages scraped]", self.pages_scraped)
|
||||
|
||||
# Checkpoint saving
|
||||
if not self.dry_run and self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0:
|
||||
if (
|
||||
not self.dry_run
|
||||
and self.checkpoint_enabled
|
||||
and self.pages_scraped % self.checkpoint_interval == 0
|
||||
):
|
||||
self.save_checkpoint()
|
||||
|
||||
# Wait for any remaining tasks
|
||||
@@ -1199,10 +1271,13 @@ class DocToSkillConverter:
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
if self.dry_run:
|
||||
logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
|
||||
logger.info(
|
||||
"\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)
|
||||
)
|
||||
if len(self.visited_urls) >= preview_limit:
|
||||
logger.info(
|
||||
" (showing first %d, actual scraping may find more)", int(preview_limit)
|
||||
" (showing first %d, actual scraping may find more)",
|
||||
int(preview_limit),
|
||||
)
|
||||
logger.info("\n💡 To actually scrape, run without --dry-run")
|
||||
else:
|
||||
@@ -1237,7 +1312,10 @@ class DocToSkillConverter:
|
||||
pages.append(json.load(f))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"⚠️ Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e
|
||||
"⚠️ Error loading scraped data file %s: %s: %s",
|
||||
json_file,
|
||||
type(e).__name__,
|
||||
e,
|
||||
)
|
||||
logger.error(
|
||||
" Suggestion: File may be corrupted, consider re-scraping with --fresh"
|
||||
@@ -1245,7 +1323,9 @@ class DocToSkillConverter:
|
||||
|
||||
return pages
|
||||
|
||||
def smart_categorize(self, pages: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
|
||||
def smart_categorize(
|
||||
self, pages: list[dict[str, Any]]
|
||||
) -> dict[str, list[dict[str, Any]]]:
|
||||
"""Improved categorization with better pattern matching"""
|
||||
category_defs = self.config.get("categories", {})
|
||||
|
||||
@@ -1297,14 +1377,18 @@ class DocToSkillConverter:
|
||||
for page in pages:
|
||||
path = urlparse(page["url"]).path
|
||||
segments = [
|
||||
s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]
|
||||
s
|
||||
for s in path.split("/")
|
||||
if s and s not in ["en", "stable", "latest", "docs"]
|
||||
]
|
||||
|
||||
for seg in segments:
|
||||
url_segments[seg] += 1
|
||||
|
||||
# Top segments become categories
|
||||
top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[:8]
|
||||
top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[
|
||||
:8
|
||||
]
|
||||
|
||||
categories = {}
|
||||
for seg, count in top_segments:
|
||||
@@ -1324,7 +1408,9 @@ class DocToSkillConverter:
|
||||
|
||||
return categories
|
||||
|
||||
def generate_quick_reference(self, pages: list[dict[str, Any]]) -> list[dict[str, str]]:
|
||||
def generate_quick_reference(
|
||||
self, pages: list[dict[str, Any]]
|
||||
) -> list[dict[str, str]]:
|
||||
"""Generate quick reference from common patterns (NEW FEATURE)"""
|
||||
quick_ref = []
|
||||
|
||||
@@ -1393,7 +1479,9 @@ class DocToSkillConverter:
|
||||
logger.info(" ✓ %s.md (%d pages)", category, len(pages))
|
||||
|
||||
def create_enhanced_skill_md(
|
||||
self, categories: dict[str, list[dict[str, Any]]], quick_ref: list[dict[str, str]]
|
||||
self,
|
||||
categories: dict[str, list[dict[str, Any]]],
|
||||
quick_ref: list[dict[str, str]],
|
||||
) -> None:
|
||||
"""Create SKILL.md with actual examples (IMPROVED)"""
|
||||
# Try to infer description if not in config
|
||||
@@ -1404,7 +1492,9 @@ class DocToSkillConverter:
|
||||
if pages:
|
||||
first_page_html = pages[0].get("raw_html", "")
|
||||
break
|
||||
description = infer_description_from_docs(self.base_url, first_page_html, self.name)
|
||||
description = infer_description_from_docs(
|
||||
self.base_url, first_page_html, self.name
|
||||
)
|
||||
else:
|
||||
description = self.config["description"]
|
||||
|
||||
@@ -1412,7 +1502,9 @@ class DocToSkillConverter:
|
||||
example_codes = []
|
||||
for pages in categories.values():
|
||||
for page in pages[:3]: # First 3 pages per category
|
||||
for sample in page.get("code_samples", [])[:2]: # First 2 samples per page
|
||||
for sample in page.get("code_samples", [])[
|
||||
:2
|
||||
]: # First 2 samples per page
|
||||
code = sample.get("code", sample if isinstance(sample, str) else "")
|
||||
lang = sample.get("language", "unknown")
|
||||
if len(code) < 200 and lang != "unknown":
|
||||
@@ -1462,7 +1554,9 @@ This skill should be triggered when:
|
||||
content += pattern.get("code", "")[:300]
|
||||
content += "\n```\n\n"
|
||||
else:
|
||||
content += "*Quick reference patterns will be added as you use the skill.*\n\n"
|
||||
content += (
|
||||
"*Quick reference patterns will be added as you use the skill.*\n\n"
|
||||
)
|
||||
|
||||
# Add example codes from docs
|
||||
if example_codes:
|
||||
@@ -1477,7 +1571,9 @@ This skill includes comprehensive documentation in `references/`:
|
||||
"""
|
||||
|
||||
for cat in sorted(categories.keys()):
|
||||
content += f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
|
||||
content += (
|
||||
f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
|
||||
)
|
||||
|
||||
content += """
|
||||
Use `view` to read specific reference files when detailed information is needed.
|
||||
@@ -1625,7 +1721,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
)
|
||||
|
||||
# Validate base_url
|
||||
if "base_url" in config and not config["base_url"].startswith(("http://", "https://")):
|
||||
if "base_url" in config and not config["base_url"].startswith(
|
||||
("http://", "https://")
|
||||
):
|
||||
errors.append(
|
||||
f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)"
|
||||
)
|
||||
@@ -1648,7 +1746,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
|
||||
errors.append("'url_patterns' must be a dictionary")
|
||||
else:
|
||||
for key in ["include", "exclude"]:
|
||||
if key in config["url_patterns"] and not isinstance(config["url_patterns"][key], list):
|
||||
if key in config["url_patterns"] and not isinstance(
|
||||
config["url_patterns"][key], list
|
||||
):
|
||||
errors.append(f"'url_patterns.{key}' must be a list")
|
||||
|
||||
# Validate categories
|
||||
@@ -1740,12 +1840,18 @@ def load_config(config_path: str) -> dict[str, Any]:
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error("❌ Error: Invalid JSON in config file: %s", config_path)
|
||||
logger.error(" Details: %s", e)
|
||||
logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno)
|
||||
logger.error(
|
||||
" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno
|
||||
)
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
logger.error("❌ Error: Config file not found: %s", config_path)
|
||||
logger.error(" Suggestion: Create a config file or use an existing one from configs/")
|
||||
logger.error(" Available configs: react.json, vue.json, django.json, godot.json")
|
||||
logger.error(
|
||||
" Suggestion: Create a config file or use an existing one from configs/"
|
||||
)
|
||||
logger.error(
|
||||
" Available configs: react.json, vue.json, django.json, godot.json"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Validate config
|
||||
@@ -1763,7 +1869,9 @@ def load_config(config_path: str) -> dict[str, Any]:
|
||||
logger.error("❌ Configuration validation errors in %s:", config_path)
|
||||
for error in errors:
|
||||
logger.error(" - %s", error)
|
||||
logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples")
|
||||
logger.error(
|
||||
"\n Suggestion: Fix the above errors or check configs/ for working examples"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
return config
|
||||
@@ -1870,10 +1978,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--interactive", "-i", action="store_true", help="Interactive configuration mode"
|
||||
"--interactive",
|
||||
"-i",
|
||||
action="store_true",
|
||||
help="Interactive configuration mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)"
|
||||
"--config",
|
||||
"-c",
|
||||
type=str,
|
||||
help="Load configuration from file (e.g., configs/godot.json)",
|
||||
)
|
||||
parser.add_argument("--name", type=str, help="Skill name")
|
||||
parser.add_argument("--url", type=str, help="Base documentation URL")
|
||||
@@ -1902,14 +2016,18 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
help="Open terminal window for enhancement (use with --enhance-local)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)"
|
||||
"--api-key",
|
||||
type=str,
|
||||
help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
action="store_true",
|
||||
help="Resume from last checkpoint (for interrupted scrapes)",
|
||||
)
|
||||
parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh")
|
||||
parser.add_argument(
|
||||
"--fresh", action="store_true", help="Clear checkpoint and start fresh"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rate-limit",
|
||||
"-r",
|
||||
@@ -1936,10 +2054,16 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
||||
help="Disable rate limiting completely (same as --rate-limit 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)"
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="Enable verbose output (DEBUG level logging)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)"
|
||||
"--quiet",
|
||||
"-q",
|
||||
action="store_true",
|
||||
help="Minimize output (WARNING level logging only)",
|
||||
)
|
||||
|
||||
return parser
|
||||
@@ -2002,11 +2126,15 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
|
||||
if args.workers:
|
||||
# Validate workers count
|
||||
if args.workers < 1:
|
||||
logger.error("❌ Error: --workers must be at least 1 (got %d)", args.workers)
|
||||
logger.error(
|
||||
"❌ Error: --workers must be at least 1 (got %d)", args.workers
|
||||
)
|
||||
logger.error(" Suggestion: Use --workers 1 (default) or omit the flag")
|
||||
sys.exit(1)
|
||||
if args.workers > 10:
|
||||
logger.warning("⚠️ Warning: --workers capped at 10 (requested %d)", args.workers)
|
||||
logger.warning(
|
||||
"⚠️ Warning: --workers capped at 10 (requested %d)", args.workers
|
||||
)
|
||||
args.workers = 10
|
||||
config["workers"] = args.workers
|
||||
if args.workers > 1:
|
||||
@@ -2160,7 +2288,11 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
|
||||
logger.info("=" * 60 + "\n")
|
||||
|
||||
try:
|
||||
enhance_cmd = ["python3", "cli/enhance_skill.py", f"output/{config['name']}/"]
|
||||
enhance_cmd = [
|
||||
"python3",
|
||||
"cli/enhance_skill.py",
|
||||
f"output/{config['name']}/",
|
||||
]
|
||||
if args.api_key:
|
||||
enhance_cmd.extend(["--api-key", args.api_key])
|
||||
|
||||
@@ -2204,9 +2336,14 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
|
||||
# Suggest enhancement if not done
|
||||
if not args.enhance and not args.enhance_local:
|
||||
logger.info("\n💡 Optional: Enhance SKILL.md with Claude:")
|
||||
logger.info(" Local (recommended): skill-seekers-enhance output/%s/", config["name"])
|
||||
logger.info(
|
||||
" Local (recommended): skill-seekers-enhance output/%s/", config["name"]
|
||||
)
|
||||
logger.info(" or re-run with: --enhance-local")
|
||||
logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"])
|
||||
logger.info(
|
||||
" API-based: skill-seekers-enhance-api output/%s/",
|
||||
config["name"],
|
||||
)
|
||||
logger.info(" or re-run with: --enhance")
|
||||
logger.info(
|
||||
"\n💡 Tip: Use --interactive-enhancement with --enhance-local to open terminal window"
|
||||
|
||||
@@ -79,7 +79,9 @@ class WorkflowStep:
|
||||
setup_required: str | None = None
|
||||
explanation: str | None = None # Why this step matters
|
||||
common_pitfall: str | None = None # Warning for this step
|
||||
common_variations: list[str] = field(default_factory=list) # AI: Alternative approaches
|
||||
common_variations: list[str] = field(
|
||||
default_factory=list
|
||||
) # AI: Alternative approaches
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -221,7 +223,9 @@ class WorkflowAnalyzer:
|
||||
# Check if next statement is assertion (verification)
|
||||
idx = statements.index(stmt)
|
||||
verification = None
|
||||
if idx + 1 < len(statements) and isinstance(statements[idx + 1], ast.Assert):
|
||||
if idx + 1 < len(statements) and isinstance(
|
||||
statements[idx + 1], ast.Assert
|
||||
):
|
||||
verification = ast.get_source_segment(code, statements[idx + 1])
|
||||
|
||||
steps.append(
|
||||
@@ -240,7 +244,9 @@ class WorkflowAnalyzer:
|
||||
|
||||
return steps
|
||||
|
||||
def _extract_steps_heuristic(self, code: str, _workflow: dict) -> list[WorkflowStep]:
|
||||
def _extract_steps_heuristic(
|
||||
self, code: str, _workflow: dict
|
||||
) -> list[WorkflowStep]:
|
||||
"""Extract steps using heuristics (for non-Python or invalid syntax)"""
|
||||
steps = []
|
||||
lines = code.split("\n")
|
||||
@@ -259,7 +265,11 @@ class WorkflowAnalyzer:
|
||||
description = self._infer_description_from_code(step_code)
|
||||
|
||||
steps.append(
|
||||
WorkflowStep(step_number=step_num, code=step_code, description=description)
|
||||
WorkflowStep(
|
||||
step_number=step_num,
|
||||
code=step_code,
|
||||
description=description,
|
||||
)
|
||||
)
|
||||
step_num += 1
|
||||
current_step = []
|
||||
@@ -272,7 +282,9 @@ class WorkflowAnalyzer:
|
||||
step_code = "\n".join(current_step)
|
||||
description = self._infer_description_from_code(step_code)
|
||||
steps.append(
|
||||
WorkflowStep(step_number=step_num, code=step_code, description=description)
|
||||
WorkflowStep(
|
||||
step_number=step_num, code=step_code, description=description
|
||||
)
|
||||
)
|
||||
|
||||
return steps
|
||||
@@ -336,7 +348,11 @@ class WorkflowAnalyzer:
|
||||
|
||||
def _detect_prerequisites(self, workflow: dict) -> dict:
|
||||
"""Detect prerequisites from workflow"""
|
||||
metadata = {"prerequisites": [], "required_imports": [], "required_fixtures": []}
|
||||
metadata = {
|
||||
"prerequisites": [],
|
||||
"required_imports": [],
|
||||
"required_fixtures": [],
|
||||
}
|
||||
|
||||
# Get dependencies from workflow
|
||||
dependencies = workflow.get("dependencies", [])
|
||||
@@ -438,7 +454,9 @@ class WorkflowGrouper:
|
||||
groups = self._group_by_file_path(workflows)
|
||||
return groups
|
||||
|
||||
def _group_by_ai_tutorial_group(self, workflows: list[dict]) -> dict[str, list[dict]]:
|
||||
def _group_by_ai_tutorial_group(
|
||||
self, workflows: list[dict]
|
||||
) -> dict[str, list[dict]]:
|
||||
"""Group by AI-generated tutorial_group (from C3.6 enhancement)"""
|
||||
groups = defaultdict(list)
|
||||
ungrouped = []
|
||||
@@ -866,7 +884,10 @@ class HowToGuideBuilder:
|
||||
if not workflows:
|
||||
logger.warning("No workflow examples found!")
|
||||
return GuideCollection(
|
||||
total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[]
|
||||
total_guides=0,
|
||||
guides_by_complexity={},
|
||||
guides_by_use_case={},
|
||||
guides=[],
|
||||
)
|
||||
|
||||
# Group workflows
|
||||
@@ -893,7 +914,9 @@ class HowToGuideBuilder:
|
||||
"""Filter to workflow category only"""
|
||||
return [ex for ex in examples if ex.get("category") == "workflow"]
|
||||
|
||||
def _create_guide(self, title: str, workflows: list[dict], enhancer=None) -> HowToGuide:
|
||||
def _create_guide(
|
||||
self, title: str, workflows: list[dict], enhancer=None
|
||||
) -> HowToGuide:
|
||||
"""
|
||||
Generate single guide from workflow(s).
|
||||
|
||||
@@ -928,7 +951,8 @@ class HowToGuideBuilder:
|
||||
# Extract source files
|
||||
source_files = [w.get("file_path", "") for w in workflows]
|
||||
source_files = [
|
||||
f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows, strict=False)
|
||||
f"{Path(f).name}:{w.get('line_start', 0)}"
|
||||
for f, w in zip(source_files, workflows, strict=False)
|
||||
]
|
||||
|
||||
# Create guide
|
||||
@@ -950,14 +974,18 @@ class HowToGuideBuilder:
|
||||
|
||||
# Add AI enhancements if enhancer is available
|
||||
if enhancer:
|
||||
self._enhance_guide_with_ai(guide, primary_workflow.get("ai_analysis", {}), enhancer)
|
||||
self._enhance_guide_with_ai(
|
||||
guide, primary_workflow.get("ai_analysis", {}), enhancer
|
||||
)
|
||||
elif self.enhance_with_ai and primary_workflow.get("ai_analysis"):
|
||||
# Fallback to old enhancement method (basic)
|
||||
self._enhance_guide_with_ai_basic(guide, primary_workflow["ai_analysis"])
|
||||
|
||||
return guide
|
||||
|
||||
def _generate_overview(self, primary_workflow: dict, _all_workflows: list[dict]) -> str:
|
||||
def _generate_overview(
|
||||
self, primary_workflow: dict, _all_workflows: list[dict]
|
||||
) -> str:
|
||||
"""Generate guide overview"""
|
||||
# Try to get explanation from AI analysis
|
||||
if primary_workflow.get("ai_analysis"):
|
||||
@@ -991,7 +1019,10 @@ class HowToGuideBuilder:
|
||||
# Prepare guide data for enhancer
|
||||
guide_data = {
|
||||
"title": guide.title,
|
||||
"steps": [{"description": step.description, "code": step.code} for step in guide.steps],
|
||||
"steps": [
|
||||
{"description": step.description, "code": step.code}
|
||||
for step in guide.steps
|
||||
],
|
||||
"language": "python", # TODO: Detect from code
|
||||
"prerequisites": guide.prerequisites,
|
||||
"description": guide.overview,
|
||||
@@ -1024,7 +1055,9 @@ class HowToGuideBuilder:
|
||||
if "use_cases" in enhanced_data:
|
||||
guide.use_cases = enhanced_data["use_cases"]
|
||||
|
||||
logger.info(f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements")
|
||||
logger.info(
|
||||
f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements"
|
||||
)
|
||||
|
||||
def _enhance_guide_with_ai_basic(self, guide: HowToGuide, ai_analysis: dict):
|
||||
"""
|
||||
@@ -1089,7 +1122,9 @@ class HowToGuideBuilder:
|
||||
|
||||
for guide in guides:
|
||||
# Generate filename from title
|
||||
filename = guide.title.lower().replace(" ", "-").replace(":", "") + ".md"
|
||||
filename = (
|
||||
guide.title.lower().replace(" ", "-").replace(":", "") + ".md"
|
||||
)
|
||||
file_path = use_case_dir / filename
|
||||
|
||||
# Generate and save markdown
|
||||
@@ -1100,7 +1135,9 @@ class HowToGuideBuilder:
|
||||
index_markdown = self.generator.generate_index(collection.guides)
|
||||
(output_dir / "index.md").write_text(index_markdown, encoding="utf-8")
|
||||
|
||||
logger.info(f"✅ Saved {collection.total_guides} guides + index to {output_dir}")
|
||||
logger.info(
|
||||
f"✅ Saved {collection.total_guides} guides + index to {output_dir}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@@ -1142,11 +1179,15 @@ Grouping Strategies:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"input", nargs="?", help="Input: directory with test files OR test_examples.json file"
|
||||
"input",
|
||||
nargs="?",
|
||||
help="Input: directory with test files OR test_examples.json file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--input", dest="input_file", help="Input JSON file with test examples (from C3.2)"
|
||||
"--input",
|
||||
dest="input_file",
|
||||
help="Input JSON file with test examples (from C3.2)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -1165,7 +1206,9 @@ Grouping Strategies:
|
||||
parser.add_argument("--no-ai", action="store_true", help="Disable AI enhancement")
|
||||
|
||||
parser.add_argument(
|
||||
"--json-output", action="store_true", help="Output JSON summary instead of markdown files"
|
||||
"--json-output",
|
||||
action="store_true",
|
||||
help="Output JSON summary instead of markdown files",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -1201,7 +1244,9 @@ Grouping Strategies:
|
||||
# Extract from directory using test example extractor
|
||||
print("⚠️ Directory input requires test example extractor")
|
||||
print(" Please use test_examples.json output from C3.2")
|
||||
print(f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json")
|
||||
print(
|
||||
f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
else:
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""ABOUTME: Downloads llms.txt files from documentation URLs with retry logic"""
|
||||
|
||||
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""ABOUTME: Parses llms.txt markdown content into structured page data"""
|
||||
|
||||
|
||||
import re
|
||||
from urllib.parse import urljoin
|
||||
|
||||
@@ -128,7 +127,9 @@ class LlmsTxtParser:
|
||||
# Extract code blocks
|
||||
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
|
||||
for lang, code in code_blocks:
|
||||
page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"})
|
||||
page["code_samples"].append(
|
||||
{"code": code.strip(), "language": lang or "unknown"}
|
||||
)
|
||||
|
||||
# Extract h2/h3 headings
|
||||
headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE)
|
||||
@@ -145,7 +146,9 @@ class LlmsTxtParser:
|
||||
content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
|
||||
|
||||
# Extract paragraphs
|
||||
paragraphs = [p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20]
|
||||
paragraphs = [
|
||||
p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20
|
||||
]
|
||||
page["content"] = "\n\n".join(paragraphs)
|
||||
|
||||
return page
|
||||
|
||||
@@ -237,7 +237,9 @@ class PatternRecognizer:
|
||||
self.detectors.append(TemplateMethodDetector(self.depth))
|
||||
self.detectors.append(ChainOfResponsibilityDetector(self.depth))
|
||||
|
||||
def analyze_file(self, file_path: str, content: str, language: str) -> PatternReport:
|
||||
def analyze_file(
|
||||
self, file_path: str, content: str, language: str
|
||||
) -> PatternReport:
|
||||
"""
|
||||
Analyze a single file for design patterns.
|
||||
|
||||
@@ -428,7 +430,9 @@ class SingletonDetector(BasePatternDetector):
|
||||
# Python: __init__ or __new__
|
||||
# Java/C#: private constructor (detected by naming)
|
||||
# Check if it has logic (not just pass)
|
||||
if method.name in ["__new__", "__init__", "constructor"] and (method.docstring or len(method.parameters) > 1):
|
||||
if method.name in ["__new__", "__init__", "constructor"] and (
|
||||
method.docstring or len(method.parameters) > 1
|
||||
):
|
||||
evidence.append(f"Controlled initialization: {method.name}")
|
||||
confidence += 0.3
|
||||
has_init_control = True
|
||||
@@ -535,17 +539,19 @@ class FactoryDetector(BasePatternDetector):
|
||||
for method in class_sig.methods:
|
||||
method_lower = method.name.lower()
|
||||
# Check if method returns something (has return type or is not void)
|
||||
if any(name in method_lower for name in factory_method_names) and (method.return_type or "create" in method_lower):
|
||||
if any(name in method_lower for name in factory_method_names) and (
|
||||
method.return_type or "create" in method_lower
|
||||
):
|
||||
return PatternInstance(
|
||||
pattern_type=self.pattern_type,
|
||||
category=self.category,
|
||||
confidence=0.6,
|
||||
location="",
|
||||
class_name=class_sig.name,
|
||||
method_name=method.name,
|
||||
line_number=method.line_number,
|
||||
evidence=[f"Factory method detected: {method.name}"],
|
||||
)
|
||||
pattern_type=self.pattern_type,
|
||||
category=self.category,
|
||||
confidence=0.6,
|
||||
location="",
|
||||
class_name=class_sig.name,
|
||||
method_name=method.name,
|
||||
line_number=method.line_number,
|
||||
evidence=[f"Factory method detected: {method.name}"],
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
@@ -575,7 +581,9 @@ class FactoryDetector(BasePatternDetector):
|
||||
|
||||
# Check if multiple factory methods exist (Abstract Factory pattern)
|
||||
if len(factory_methods) >= 2:
|
||||
evidence.append(f"Multiple factory methods: {', '.join(factory_methods[:3])}")
|
||||
evidence.append(
|
||||
f"Multiple factory methods: {', '.join(factory_methods[:3])}"
|
||||
)
|
||||
confidence += 0.2
|
||||
|
||||
# Check for inheritance (factory hierarchy)
|
||||
@@ -682,7 +690,13 @@ class ObserverDetector(BasePatternDetector):
|
||||
has_notify = False
|
||||
|
||||
attach_names = ["attach", "add", "subscribe", "register", "addeventlistener"]
|
||||
detach_names = ["detach", "remove", "unsubscribe", "unregister", "removeeventlistener"]
|
||||
detach_names = [
|
||||
"detach",
|
||||
"remove",
|
||||
"unsubscribe",
|
||||
"unregister",
|
||||
"removeeventlistener",
|
||||
]
|
||||
notify_names = ["notify", "update", "emit", "publish", "fire", "trigger"]
|
||||
|
||||
for method in class_sig.methods:
|
||||
@@ -786,25 +800,35 @@ class StrategyDetector(BasePatternDetector):
|
||||
]
|
||||
|
||||
if siblings:
|
||||
evidence.append(f"Part of strategy family with: {', '.join(siblings[:3])}")
|
||||
evidence.append(
|
||||
f"Part of strategy family with: {', '.join(siblings[:3])}"
|
||||
)
|
||||
confidence += 0.5
|
||||
|
||||
if base_class and ("strategy" in base_class.lower() or "policy" in base_class.lower()):
|
||||
if base_class and (
|
||||
"strategy" in base_class.lower() or "policy" in base_class.lower()
|
||||
):
|
||||
evidence.append(f"Inherits from strategy base: {base_class}")
|
||||
confidence += 0.3
|
||||
|
||||
# Check if this is a strategy base class
|
||||
# (has subclasses in same file)
|
||||
subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes]
|
||||
subclasses = [
|
||||
cls.name for cls in all_classes if class_sig.name in cls.base_classes
|
||||
]
|
||||
|
||||
if len(subclasses) >= 2:
|
||||
evidence.append(f"Strategy base with implementations: {', '.join(subclasses[:3])}")
|
||||
evidence.append(
|
||||
f"Strategy base with implementations: {', '.join(subclasses[:3])}"
|
||||
)
|
||||
confidence += 0.6
|
||||
|
||||
# Check for single dominant method (strategy interface)
|
||||
if len(class_sig.methods) == 1 or len(class_sig.methods) == 2:
|
||||
# Single method or method + __init__
|
||||
main_method = [m for m in class_sig.methods if m.name not in ["__init__", "__new__"]]
|
||||
main_method = [
|
||||
m for m in class_sig.methods if m.name not in ["__init__", "__new__"]
|
||||
]
|
||||
if main_method:
|
||||
evidence.append(f"Strategy interface method: {main_method[0].name}")
|
||||
confidence += 0.2
|
||||
@@ -1274,7 +1298,9 @@ class TemplateMethodDetector(BasePatternDetector):
|
||||
class_lower = class_sig.name.lower()
|
||||
if any(keyword in class_lower for keyword in template_keywords):
|
||||
# Check if has subclasses
|
||||
subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes]
|
||||
subclasses = [
|
||||
cls.name for cls in all_classes if class_sig.name in cls.base_classes
|
||||
]
|
||||
|
||||
if subclasses:
|
||||
return PatternInstance(
|
||||
@@ -1284,7 +1310,9 @@ class TemplateMethodDetector(BasePatternDetector):
|
||||
location="",
|
||||
class_name=class_sig.name,
|
||||
line_number=class_sig.line_number,
|
||||
evidence=[f"Abstract base with subclasses: {', '.join(subclasses[:2])}"],
|
||||
evidence=[
|
||||
f"Abstract base with subclasses: {', '.join(subclasses[:2])}"
|
||||
],
|
||||
related_classes=subclasses,
|
||||
)
|
||||
|
||||
@@ -1301,7 +1329,9 @@ class TemplateMethodDetector(BasePatternDetector):
|
||||
# 3. Has template method that orchestrates
|
||||
|
||||
# Check for subclasses
|
||||
subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes]
|
||||
subclasses = [
|
||||
cls.name for cls in all_classes if class_sig.name in cls.base_classes
|
||||
]
|
||||
|
||||
if len(subclasses) >= 1:
|
||||
evidence.append(f"Base class with {len(subclasses)} implementations")
|
||||
@@ -1437,7 +1467,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
|
||||
|
||||
# Check for set_next() method
|
||||
has_set_next = any(
|
||||
"next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower())
|
||||
"next" in m.name.lower()
|
||||
and ("set" in m.name.lower() or "add" in m.name.lower())
|
||||
for m in class_sig.methods
|
||||
)
|
||||
|
||||
@@ -1458,7 +1489,9 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
|
||||
]
|
||||
|
||||
if siblings and has_next_ref:
|
||||
evidence.append(f"Part of handler chain with: {', '.join(siblings[:2])}")
|
||||
evidence.append(
|
||||
f"Part of handler chain with: {', '.join(siblings[:2])}"
|
||||
)
|
||||
confidence += 0.2
|
||||
|
||||
if confidence >= 0.5:
|
||||
@@ -1515,7 +1548,11 @@ class LanguageAdapter:
|
||||
pattern.confidence = min(pattern.confidence + 0.1, 1.0)
|
||||
|
||||
# Strategy: Duck typing common in Python
|
||||
elif pattern.pattern_type == "Strategy" and "duck typing" in evidence_str or "protocol" in evidence_str:
|
||||
elif (
|
||||
pattern.pattern_type == "Strategy"
|
||||
and "duck typing" in evidence_str
|
||||
or "protocol" in evidence_str
|
||||
):
|
||||
pattern.confidence = min(pattern.confidence + 0.05, 1.0)
|
||||
|
||||
# JavaScript/TypeScript adaptations
|
||||
@@ -1532,7 +1569,11 @@ class LanguageAdapter:
|
||||
pattern.confidence = min(pattern.confidence + 0.05, 1.0)
|
||||
|
||||
# Observer: Event emitters are built-in
|
||||
elif pattern.pattern_type == "Observer" and "eventemitter" in evidence_str or "event" in evidence_str:
|
||||
elif (
|
||||
pattern.pattern_type == "Observer"
|
||||
and "eventemitter" in evidence_str
|
||||
or "event" in evidence_str
|
||||
):
|
||||
pattern.confidence = min(pattern.confidence + 0.1, 1.0)
|
||||
pattern.evidence.append("EventEmitter pattern detected")
|
||||
|
||||
@@ -1549,7 +1590,9 @@ class LanguageAdapter:
|
||||
pattern.evidence.append("Abstract Factory pattern")
|
||||
|
||||
# Template Method: Abstract classes common
|
||||
elif pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str:
|
||||
elif (
|
||||
pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str
|
||||
):
|
||||
pattern.confidence = min(pattern.confidence + 0.1, 1.0)
|
||||
|
||||
# Go adaptations
|
||||
@@ -1602,7 +1645,9 @@ class LanguageAdapter:
|
||||
pattern.evidence.append("Ruby Singleton module")
|
||||
|
||||
# Builder: Method chaining is idiomatic
|
||||
elif pattern.pattern_type == "Builder" and "method chaining" in evidence_str:
|
||||
elif (
|
||||
pattern.pattern_type == "Builder" and "method chaining" in evidence_str
|
||||
):
|
||||
pattern.confidence = min(pattern.confidence + 0.05, 1.0)
|
||||
|
||||
# PHP adaptations
|
||||
@@ -1653,9 +1698,13 @@ Supported Languages:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--file", action="append", help="Source file to analyze (can be specified multiple times)"
|
||||
"--file",
|
||||
action="append",
|
||||
help="Source file to analyze (can be specified multiple times)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--directory", help="Directory to analyze (analyzes all source files)"
|
||||
)
|
||||
parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)")
|
||||
parser.add_argument(
|
||||
"--output", help="Output directory for results (default: current directory)"
|
||||
)
|
||||
@@ -1666,7 +1715,9 @@ Supported Languages:
|
||||
help="Detection depth: surface (fast), deep (default), full (thorough)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json", action="store_true", help="Output JSON format instead of human-readable"
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Output JSON format instead of human-readable",
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
|
||||
|
||||
|
||||
@@ -194,11 +194,15 @@ class PythonTestAnalyzer:
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if self._is_test_class(node):
|
||||
examples.extend(self._extract_from_test_class(node, file_path, imports))
|
||||
examples.extend(
|
||||
self._extract_from_test_class(node, file_path, imports)
|
||||
)
|
||||
|
||||
# Find test functions (pytest)
|
||||
elif isinstance(node, ast.FunctionDef) and self._is_test_function(node):
|
||||
examples.extend(self._extract_from_test_function(node, file_path, imports))
|
||||
examples.extend(
|
||||
self._extract_from_test_function(node, file_path, imports)
|
||||
)
|
||||
|
||||
return examples
|
||||
|
||||
@@ -232,7 +236,9 @@ class PythonTestAnalyzer:
|
||||
return True
|
||||
# Has @pytest.mark decorator
|
||||
for decorator in node.decorator_list:
|
||||
if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(decorator):
|
||||
if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(
|
||||
decorator
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -249,7 +255,9 @@ class PythonTestAnalyzer:
|
||||
for node in class_node.body:
|
||||
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
|
||||
examples.extend(
|
||||
self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
|
||||
self._analyze_test_body(
|
||||
node, file_path, imports, setup_code=setup_code
|
||||
)
|
||||
)
|
||||
|
||||
return examples
|
||||
@@ -261,7 +269,9 @@ class PythonTestAnalyzer:
|
||||
# Check for fixture parameters
|
||||
fixture_setup = self._extract_fixtures(func_node)
|
||||
|
||||
return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup)
|
||||
return self._analyze_test_body(
|
||||
func_node, file_path, imports, setup_code=fixture_setup
|
||||
)
|
||||
|
||||
def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None:
|
||||
"""Extract setUp method code"""
|
||||
@@ -318,7 +328,9 @@ class PythonTestAnalyzer:
|
||||
examples.extend(configs)
|
||||
|
||||
# 4. Multi-step workflows (integration tests)
|
||||
workflows = self._find_workflows(func_node, file_path, docstring, setup_code, tags, imports)
|
||||
workflows = self._find_workflows(
|
||||
func_node, file_path, docstring, setup_code, tags, imports
|
||||
)
|
||||
examples.extend(workflows)
|
||||
|
||||
return examples
|
||||
@@ -362,7 +374,11 @@ class PythonTestAnalyzer:
|
||||
|
||||
for node in ast.walk(func_node):
|
||||
# Check if meaningful instantiation
|
||||
if isinstance(node, ast.Assign) and isinstance(node.value, ast.Call) and self._is_meaningful_instantiation(node):
|
||||
if (
|
||||
isinstance(node, ast.Assign)
|
||||
and isinstance(node.value, ast.Call)
|
||||
and self._is_meaningful_instantiation(node)
|
||||
):
|
||||
code = ast.unparse(node)
|
||||
|
||||
# Skip trivial or mock-only
|
||||
@@ -408,7 +424,11 @@ class PythonTestAnalyzer:
|
||||
statements = func_node.body
|
||||
for i, stmt in enumerate(statements):
|
||||
# Look for method calls and check if next statement is an assertion
|
||||
if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call) and i + 1 < len(statements):
|
||||
if (
|
||||
isinstance(stmt, ast.Expr)
|
||||
and isinstance(stmt.value, ast.Call)
|
||||
and i + 1 < len(statements)
|
||||
):
|
||||
next_stmt = statements[i + 1]
|
||||
if self._is_assertion(next_stmt):
|
||||
method_call = ast.unparse(stmt)
|
||||
@@ -455,7 +475,11 @@ class PythonTestAnalyzer:
|
||||
|
||||
for node in ast.walk(func_node):
|
||||
# Must have 2+ keys and be meaningful
|
||||
if isinstance(node, ast.Assign) and isinstance(node.value, ast.Dict) and len(node.value.keys) >= 2:
|
||||
if (
|
||||
isinstance(node, ast.Assign)
|
||||
and isinstance(node.value, ast.Dict)
|
||||
and len(node.value.keys) >= 2
|
||||
):
|
||||
code = ast.unparse(node)
|
||||
|
||||
# Check if looks like configuration
|
||||
@@ -467,7 +491,9 @@ class PythonTestAnalyzer:
|
||||
code=code,
|
||||
language="Python",
|
||||
description=f"Configuration example: {description}",
|
||||
expected_behavior=self._extract_assertion_after(func_node, node),
|
||||
expected_behavior=self._extract_assertion_after(
|
||||
func_node, node
|
||||
),
|
||||
setup_code=setup_code,
|
||||
file_path=file_path,
|
||||
line_start=node.lineno,
|
||||
@@ -568,7 +594,9 @@ class PythonTestAnalyzer:
|
||||
integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"]
|
||||
return any(keyword in test_name for keyword in integration_keywords)
|
||||
|
||||
def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str:
|
||||
def _extract_assertion_after(
|
||||
self, func_node: ast.FunctionDef, target_node: ast.AST
|
||||
) -> str:
|
||||
"""Find assertion that follows the target node"""
|
||||
found_target = False
|
||||
for stmt in func_node.body:
|
||||
@@ -699,7 +727,8 @@ class GenericTestAnalyzer:
|
||||
code=config_match.group(0),
|
||||
language=language,
|
||||
file_path=file_path,
|
||||
line_number=code[: start_pos + config_match.start()].count("\n") + 1,
|
||||
line_number=code[: start_pos + config_match.start()].count("\n")
|
||||
+ 1,
|
||||
)
|
||||
examples.append(example)
|
||||
|
||||
@@ -842,7 +871,9 @@ class TestExampleExtractor:
|
||||
logger.warning(f"⚠️ Failed to initialize AI enhancer: {e}")
|
||||
self.enhance_with_ai = False
|
||||
|
||||
def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport:
|
||||
def extract_from_directory(
|
||||
self, directory: Path, recursive: bool = True
|
||||
) -> ExampleReport:
|
||||
"""Extract examples from all test files in directory"""
|
||||
directory = Path(directory)
|
||||
|
||||
@@ -896,11 +927,13 @@ class TestExampleExtractor:
|
||||
# Limit per file
|
||||
if len(filtered_examples) > self.max_per_file:
|
||||
# Sort by confidence and take top N
|
||||
filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
|
||||
: self.max_per_file
|
||||
]
|
||||
filtered_examples = sorted(
|
||||
filtered_examples, key=lambda x: x.confidence, reverse=True
|
||||
)[: self.max_per_file]
|
||||
|
||||
logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
|
||||
logger.info(
|
||||
f"Extracted {len(filtered_examples)} examples from {file_path.name}"
|
||||
)
|
||||
|
||||
return filtered_examples
|
||||
|
||||
@@ -955,7 +988,9 @@ class TestExampleExtractor:
|
||||
|
||||
# Calculate averages
|
||||
avg_complexity = (
|
||||
sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
|
||||
sum(ex.complexity_score for ex in examples) / len(examples)
|
||||
if examples
|
||||
else 0.0
|
||||
)
|
||||
high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
|
||||
|
||||
@@ -1009,10 +1044,15 @@ Examples:
|
||||
help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
|
||||
"--max-per-file",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Maximum examples per file (default: 10)",
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="Output JSON format")
|
||||
parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
|
||||
parser.add_argument(
|
||||
"--markdown", action="store_true", help="Output Markdown format"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recursive",
|
||||
action="store_true",
|
||||
@@ -1029,7 +1069,9 @@ Examples:
|
||||
# Create extractor
|
||||
languages = [args.language] if args.language else None
|
||||
extractor = TestExampleExtractor(
|
||||
min_confidence=args.min_confidence, max_per_file=args.max_per_file, languages=languages
|
||||
min_confidence=args.min_confidence,
|
||||
max_per_file=args.max_per_file,
|
||||
languages=languages,
|
||||
)
|
||||
|
||||
# Extract examples
|
||||
@@ -1037,7 +1079,9 @@ Examples:
|
||||
examples = extractor.extract_from_file(Path(args.file))
|
||||
report = extractor._create_report(examples, file_path=args.file)
|
||||
else:
|
||||
report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive)
|
||||
report = extractor.extract_from_directory(
|
||||
Path(args.directory), recursive=args.recursive
|
||||
)
|
||||
|
||||
# Output results
|
||||
if args.json:
|
||||
|
||||
@@ -95,13 +95,20 @@ class UnifiedCodebaseAnalyzer:
|
||||
# Step 1: Acquire source
|
||||
if self.is_github_url(source):
|
||||
print("📦 Source type: GitHub repository")
|
||||
return self._analyze_github(source, depth, fetch_github_metadata, output_dir, interactive)
|
||||
return self._analyze_github(
|
||||
source, depth, fetch_github_metadata, output_dir, interactive
|
||||
)
|
||||
else:
|
||||
print("📁 Source type: Local directory")
|
||||
return self._analyze_local(source, depth)
|
||||
|
||||
def _analyze_github(
|
||||
self, repo_url: str, depth: str, fetch_metadata: bool, output_dir: Path | None, interactive: bool = True
|
||||
self,
|
||||
repo_url: str,
|
||||
depth: str,
|
||||
fetch_metadata: bool,
|
||||
output_dir: Path | None,
|
||||
interactive: bool = True,
|
||||
) -> AnalysisResult:
|
||||
"""
|
||||
Analyze GitHub repository with three-stream fetcher.
|
||||
@@ -117,7 +124,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
AnalysisResult with all 3 streams
|
||||
"""
|
||||
# Use three-stream fetcher
|
||||
fetcher = GitHubThreeStreamFetcher(repo_url, self.github_token, interactive=interactive)
|
||||
fetcher = GitHubThreeStreamFetcher(
|
||||
repo_url, self.github_token, interactive=interactive
|
||||
)
|
||||
three_streams = fetcher.fetch(output_dir)
|
||||
|
||||
# Analyze code with specified depth
|
||||
@@ -236,7 +245,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
basic = self.basic_analysis(directory)
|
||||
|
||||
# Run full C3.x analysis using existing codebase_scraper
|
||||
print("🔍 Running C3.x components (patterns, examples, guides, configs, architecture)...")
|
||||
print(
|
||||
"🔍 Running C3.x components (patterns, examples, guides, configs, architecture)..."
|
||||
)
|
||||
|
||||
try:
|
||||
# Import codebase analyzer
|
||||
@@ -271,11 +282,19 @@ class UnifiedCodebaseAnalyzer:
|
||||
c3x = {**basic, "analysis_type": "c3x", **c3x_data}
|
||||
|
||||
print("✅ C3.x analysis complete!")
|
||||
print(f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected")
|
||||
print(f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted")
|
||||
print(f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated")
|
||||
print(
|
||||
f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected"
|
||||
)
|
||||
print(
|
||||
f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted"
|
||||
)
|
||||
print(
|
||||
f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated"
|
||||
)
|
||||
print(f" - {len(c3x_data.get('c3_4_configs', []))} config files analyzed")
|
||||
print(f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found")
|
||||
print(
|
||||
f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found"
|
||||
)
|
||||
|
||||
return c3x
|
||||
|
||||
@@ -432,7 +451,9 @@ class UnifiedCodebaseAnalyzer:
|
||||
|
||||
if item.is_dir():
|
||||
# Only include immediate subdirectories
|
||||
structure["children"].append({"name": item.name, "type": "directory"})
|
||||
structure["children"].append(
|
||||
{"name": item.name, "type": "directory"}
|
||||
)
|
||||
elif item.is_file():
|
||||
structure["children"].append(
|
||||
{"name": item.name, "type": "file", "extension": item.suffix}
|
||||
@@ -526,7 +547,12 @@ class UnifiedCodebaseAnalyzer:
|
||||
Returns:
|
||||
Dict with statistics
|
||||
"""
|
||||
stats = {"total_files": 0, "total_size_bytes": 0, "file_types": {}, "languages": {}}
|
||||
stats = {
|
||||
"total_files": 0,
|
||||
"total_size_bytes": 0,
|
||||
"file_types": {},
|
||||
"languages": {},
|
||||
}
|
||||
|
||||
for file_path in directory.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
|
||||
@@ -29,7 +29,10 @@ from skill_seekers.cli.github_fetcher import (
|
||||
ThreeStreamData,
|
||||
)
|
||||
from skill_seekers.cli.merge_sources import RuleBasedMerger, categorize_issues_by_topic
|
||||
from skill_seekers.cli.unified_codebase_analyzer import AnalysisResult, UnifiedCodebaseAnalyzer
|
||||
from skill_seekers.cli.unified_codebase_analyzer import (
|
||||
AnalysisResult,
|
||||
UnifiedCodebaseAnalyzer,
|
||||
)
|
||||
|
||||
|
||||
class TestScenario1GitHubThreeStream:
|
||||
@@ -67,7 +70,8 @@ class TestScenario1GitHubThreeStream:
|
||||
# Create code files
|
||||
src_dir = repo_dir / "src"
|
||||
src_dir.mkdir()
|
||||
(src_dir / "auth.py").write_text("""
|
||||
(src_dir / "auth.py").write_text(
|
||||
"""
|
||||
# OAuth authentication
|
||||
def google_provider(client_id, client_secret):
|
||||
'''Google OAuth provider'''
|
||||
@@ -76,20 +80,24 @@ def google_provider(client_id, client_secret):
|
||||
def azure_provider(tenant_id, client_id):
|
||||
'''Azure OAuth provider'''
|
||||
return Provider('azure', tenant_id, client_id)
|
||||
""")
|
||||
(src_dir / "async_tools.py").write_text("""
|
||||
"""
|
||||
)
|
||||
(src_dir / "async_tools.py").write_text(
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
async def async_tool():
|
||||
'''Async tool decorator'''
|
||||
await asyncio.sleep(1)
|
||||
return "result"
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
# Create test files
|
||||
tests_dir = repo_dir / "tests"
|
||||
tests_dir.mkdir()
|
||||
(tests_dir / "test_auth.py").write_text("""
|
||||
(tests_dir / "test_auth.py").write_text(
|
||||
"""
|
||||
def test_google_provider():
|
||||
provider = google_provider('id', 'secret')
|
||||
assert provider.name == 'google'
|
||||
@@ -97,10 +105,12 @@ def test_google_provider():
|
||||
def test_azure_provider():
|
||||
provider = azure_provider('tenant', 'id')
|
||||
assert provider.name == 'azure'
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
# Create docs
|
||||
(repo_dir / "README.md").write_text("""
|
||||
(repo_dir / "README.md").write_text(
|
||||
"""
|
||||
# FastMCP
|
||||
|
||||
FastMCP is a Python framework for building MCP servers.
|
||||
@@ -116,26 +126,33 @@ pip install fastmcp
|
||||
- OAuth authentication (Google, Azure, GitHub)
|
||||
- Async/await support
|
||||
- Easy testing with pytest
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
(repo_dir / "CONTRIBUTING.md").write_text("""
|
||||
(repo_dir / "CONTRIBUTING.md").write_text(
|
||||
"""
|
||||
# Contributing
|
||||
|
||||
Please follow these guidelines when contributing.
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
docs_dir = repo_dir / "docs"
|
||||
docs_dir.mkdir()
|
||||
(docs_dir / "oauth.md").write_text("""
|
||||
(docs_dir / "oauth.md").write_text(
|
||||
"""
|
||||
# OAuth Guide
|
||||
|
||||
How to set up OAuth providers.
|
||||
""")
|
||||
(docs_dir / "async.md").write_text("""
|
||||
"""
|
||||
)
|
||||
(docs_dir / "async.md").write_text(
|
||||
"""
|
||||
# Async Guide
|
||||
|
||||
How to use async tools.
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
return repo_dir
|
||||
|
||||
@@ -186,11 +203,15 @@ How to use async tools.
|
||||
],
|
||||
}
|
||||
|
||||
def test_scenario_1_github_three_stream_fetcher(self, mock_github_repo, mock_github_api_data):
|
||||
def test_scenario_1_github_three_stream_fetcher(
|
||||
self, mock_github_repo, mock_github_api_data
|
||||
):
|
||||
"""Test GitHub three-stream fetcher with mock data."""
|
||||
# Create fetcher with mock
|
||||
with (
|
||||
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
|
||||
patch.object(
|
||||
GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo
|
||||
),
|
||||
patch.object(
|
||||
GitHubThreeStreamFetcher,
|
||||
"fetch_github_metadata",
|
||||
@@ -202,7 +223,9 @@ How to use async tools.
|
||||
return_value=mock_github_api_data["issues"],
|
||||
),
|
||||
):
|
||||
fetcher = GitHubThreeStreamFetcher("https://github.com/jlowin/fastmcp", interactive=False)
|
||||
fetcher = GitHubThreeStreamFetcher(
|
||||
"https://github.com/jlowin/fastmcp", interactive=False
|
||||
)
|
||||
three_streams = fetcher.fetch()
|
||||
|
||||
# Verify 3 streams exist
|
||||
@@ -228,10 +251,14 @@ How to use async tools.
|
||||
assert len(three_streams.insights_stream.known_solutions) >= 1
|
||||
assert len(three_streams.insights_stream.top_labels) >= 2
|
||||
|
||||
def test_scenario_1_unified_analyzer_github(self, mock_github_repo, mock_github_api_data):
|
||||
def test_scenario_1_unified_analyzer_github(
|
||||
self, mock_github_repo, mock_github_api_data
|
||||
):
|
||||
"""Test unified analyzer with GitHub source."""
|
||||
with (
|
||||
patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
|
||||
patch.object(
|
||||
GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo
|
||||
),
|
||||
patch.object(
|
||||
GitHubThreeStreamFetcher,
|
||||
"fetch_github_metadata",
|
||||
@@ -259,16 +286,24 @@ How to use async tools.
|
||||
{"name": "test_azure_provider", "file": "test_auth.py"},
|
||||
],
|
||||
"c3_2_examples_count": 2,
|
||||
"c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}],
|
||||
"c3_3_guides": [
|
||||
{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}
|
||||
],
|
||||
"c3_4_configs": [],
|
||||
"c3_7_architecture": [
|
||||
{"pattern": "Service Layer", "description": "OAuth provider abstraction"}
|
||||
{
|
||||
"pattern": "Service Layer",
|
||||
"description": "OAuth provider abstraction",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
analyzer = UnifiedCodebaseAnalyzer()
|
||||
result = analyzer.analyze(
|
||||
source="https://github.com/jlowin/fastmcp", depth="c3x", fetch_github_metadata=True, interactive=False
|
||||
source="https://github.com/jlowin/fastmcp",
|
||||
depth="c3x",
|
||||
fetch_github_metadata=True,
|
||||
interactive=False,
|
||||
)
|
||||
|
||||
# Verify result structure
|
||||
@@ -300,7 +335,9 @@ How to use async tools.
|
||||
{
|
||||
"name": "fastmcp-oauth",
|
||||
"description": "OAuth authentication for FastMCP",
|
||||
"categories": {"oauth": ["oauth", "auth", "provider", "google", "azure"]},
|
||||
"categories": {
|
||||
"oauth": ["oauth", "auth", "provider", "google", "azure"]
|
||||
},
|
||||
}
|
||||
)
|
||||
)
|
||||
@@ -454,7 +491,9 @@ pip install fastmcp
|
||||
# Check content quality (Architecture Section 8.2)
|
||||
assert "Issue #42" in router_md, "Missing issue references"
|
||||
assert "⭐" in router_md or "Stars:" in router_md, "Missing GitHub metadata"
|
||||
assert "Quick Start" in router_md or "README" in router_md, "Missing README content"
|
||||
assert (
|
||||
"Quick Start" in router_md or "README" in router_md
|
||||
), "Missing README content"
|
||||
|
||||
|
||||
class TestScenario2MultiSource:
|
||||
@@ -495,14 +534,30 @@ class TestScenario2MultiSource:
|
||||
"""Test categorizing GitHub issues by topic."""
|
||||
problems = [
|
||||
{"number": 42, "title": "OAuth setup fails", "labels": ["oauth", "bug"]},
|
||||
{"number": 38, "title": "Async tools not working", "labels": ["async", "question"]},
|
||||
{"number": 35, "title": "Testing with pytest", "labels": ["testing", "question"]},
|
||||
{"number": 30, "title": "Google OAuth redirect", "labels": ["oauth", "question"]},
|
||||
{
|
||||
"number": 38,
|
||||
"title": "Async tools not working",
|
||||
"labels": ["async", "question"],
|
||||
},
|
||||
{
|
||||
"number": 35,
|
||||
"title": "Testing with pytest",
|
||||
"labels": ["testing", "question"],
|
||||
},
|
||||
{
|
||||
"number": 30,
|
||||
"title": "Google OAuth redirect",
|
||||
"labels": ["oauth", "question"],
|
||||
},
|
||||
]
|
||||
|
||||
solutions = [
|
||||
{"number": 25, "title": "Fixed OAuth redirect", "labels": ["oauth", "bug"]},
|
||||
{"number": 20, "title": "Async timeout solution", "labels": ["async", "bug"]},
|
||||
{
|
||||
"number": 20,
|
||||
"title": "Async timeout solution",
|
||||
"labels": ["async", "bug"],
|
||||
},
|
||||
]
|
||||
|
||||
topics = ["oauth", "async", "testing"]
|
||||
@@ -533,7 +588,12 @@ class TestScenario2MultiSource:
|
||||
def test_scenario_2_conflict_detection(self):
|
||||
"""Test conflict detection between docs and code."""
|
||||
# Mock API data from docs
|
||||
api_data = {"GoogleProvider": {"params": ["app_id", "app_secret"], "source": "html_docs"}}
|
||||
api_data = {
|
||||
"GoogleProvider": {
|
||||
"params": ["app_id", "app_secret"],
|
||||
"source": "html_docs",
|
||||
}
|
||||
}
|
||||
|
||||
# Mock GitHub docs
|
||||
github_docs = {"readme": "Use client_id and client_secret for Google OAuth"}
|
||||
@@ -557,23 +617,33 @@ class TestScenario2MultiSource:
|
||||
# Layer 4: GitHub insights (community knowledge)
|
||||
|
||||
# Mock source 1 (HTML docs)
|
||||
source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]}
|
||||
source1_data = {
|
||||
"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]
|
||||
}
|
||||
|
||||
# Mock source 2 (GitHub C3.x)
|
||||
source2_data = {
|
||||
"api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]
|
||||
"api": [
|
||||
{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}
|
||||
]
|
||||
}
|
||||
|
||||
# Mock GitHub streams
|
||||
_github_streams = ThreeStreamData(
|
||||
code_stream=CodeStream(directory=Path("/tmp"), files=[]),
|
||||
docs_stream=DocsStream(
|
||||
readme="Use client_id and client_secret", contributing=None, docs_files=[]
|
||||
readme="Use client_id and client_secret",
|
||||
contributing=None,
|
||||
docs_files=[],
|
||||
),
|
||||
insights_stream=InsightsStream(
|
||||
metadata={"stars": 1000},
|
||||
common_problems=[
|
||||
{"number": 42, "title": "OAuth parameter confusion", "labels": ["oauth"]}
|
||||
{
|
||||
"number": 42,
|
||||
"title": "OAuth parameter confusion",
|
||||
"labels": ["oauth"],
|
||||
}
|
||||
],
|
||||
known_solutions=[],
|
||||
top_labels=[],
|
||||
@@ -581,7 +651,9 @@ class TestScenario2MultiSource:
|
||||
)
|
||||
|
||||
# Create merger with required arguments
|
||||
merger = RuleBasedMerger(docs_data=source1_data, github_data=source2_data, conflicts=[])
|
||||
merger = RuleBasedMerger(
|
||||
docs_data=source1_data, github_data=source2_data, conflicts=[]
|
||||
)
|
||||
|
||||
# Merge using merge_all() method
|
||||
merged = merger.merge_all()
|
||||
@@ -625,7 +697,8 @@ class TestScenario3LocalCodebase:
|
||||
# Create source files
|
||||
src_dir = project_dir / "src"
|
||||
src_dir.mkdir()
|
||||
(src_dir / "database.py").write_text("""
|
||||
(src_dir / "database.py").write_text(
|
||||
"""
|
||||
class DatabaseConnection:
|
||||
'''Database connection pool'''
|
||||
def __init__(self, host, port):
|
||||
@@ -635,9 +708,11 @@ class DatabaseConnection:
|
||||
def connect(self):
|
||||
'''Establish connection'''
|
||||
pass
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
(src_dir / "api.py").write_text("""
|
||||
(src_dir / "api.py").write_text(
|
||||
"""
|
||||
from flask import Flask
|
||||
|
||||
app = Flask(__name__)
|
||||
@@ -646,16 +721,19 @@ app = Flask(__name__)
|
||||
def get_users():
|
||||
'''Get all users'''
|
||||
return {'users': []}
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
# Create tests
|
||||
tests_dir = project_dir / "tests"
|
||||
tests_dir.mkdir()
|
||||
(tests_dir / "test_database.py").write_text("""
|
||||
(tests_dir / "test_database.py").write_text(
|
||||
"""
|
||||
def test_connection():
|
||||
conn = DatabaseConnection('localhost', 5432)
|
||||
assert conn.host == 'localhost'
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
return project_dir
|
||||
|
||||
@@ -692,8 +770,12 @@ def test_connection():
|
||||
mock_c3x.return_value = {
|
||||
"files": ["database.py", "api.py"],
|
||||
"analysis_type": "c3x",
|
||||
"c3_1_patterns": [{"name": "Singleton", "count": 1, "file": "database.py"}],
|
||||
"c3_2_examples": [{"name": "test_connection", "file": "test_database.py"}],
|
||||
"c3_1_patterns": [
|
||||
{"name": "Singleton", "count": 1, "file": "database.py"}
|
||||
],
|
||||
"c3_2_examples": [
|
||||
{"name": "test_connection", "file": "test_database.py"}
|
||||
],
|
||||
"c3_2_examples_count": 1,
|
||||
"c3_3_guides": [],
|
||||
"c3_4_configs": [],
|
||||
@@ -885,7 +967,9 @@ Based on analysis of GitHub issues:
|
||||
print(f"\nGitHub overhead: {github_overhead} lines")
|
||||
|
||||
# Architecture target: 20-60 lines
|
||||
assert 20 <= github_overhead <= 60, f"GitHub overhead {github_overhead} not in range 20-60"
|
||||
assert (
|
||||
20 <= github_overhead <= 60
|
||||
), f"GitHub overhead {github_overhead} not in range 20-60"
|
||||
|
||||
def test_router_size_within_limits(self):
|
||||
"""Test router size is 150±20 lines (Architecture Section 8.1, Line 1970)."""
|
||||
@@ -893,7 +977,9 @@ Based on analysis of GitHub issues:
|
||||
router_lines = 150 # Simulated count
|
||||
|
||||
# Architecture target: 150 lines (±20)
|
||||
assert 130 <= router_lines <= 170, f"Router size {router_lines} not in range 130-170"
|
||||
assert (
|
||||
130 <= router_lines <= 170
|
||||
), f"Router size {router_lines} not in range 130-170"
|
||||
|
||||
def test_content_quality_requirements(self):
|
||||
"""Test content quality (Architecture Section 8.2, Lines 1977-2014)."""
|
||||
@@ -935,9 +1021,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
|
||||
|
||||
# Check minimum 3 code examples
|
||||
code_blocks = sub_skill_md.count("```")
|
||||
assert code_blocks >= 6, (
|
||||
f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
|
||||
)
|
||||
assert (
|
||||
code_blocks >= 6
|
||||
), f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
|
||||
|
||||
# Check language tags
|
||||
assert "```python" in sub_skill_md, "Code blocks must have language tags"
|
||||
@@ -952,9 +1038,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
|
||||
|
||||
# Check solution indicators for closed issues
|
||||
if "closed" in sub_skill_md.lower():
|
||||
assert "✅" in sub_skill_md or "Solution" in sub_skill_md, (
|
||||
"Closed issues should indicate solution found"
|
||||
)
|
||||
assert (
|
||||
"✅" in sub_skill_md or "Solution" in sub_skill_md
|
||||
), "Closed issues should indicate solution found"
|
||||
|
||||
|
||||
class TestTokenEfficiencyCalculation:
|
||||
@@ -991,9 +1077,9 @@ class TestTokenEfficiencyCalculation:
|
||||
|
||||
# With selective loading and caching, achieve 35-40%
|
||||
# Even conservative estimate shows 29.5%, actual usage patterns show 35-40%
|
||||
assert reduction_percent >= 29, (
|
||||
f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
|
||||
)
|
||||
assert (
|
||||
reduction_percent >= 29
|
||||
), f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -103,7 +103,9 @@ class TestAsyncScrapeMethods(unittest.TestCase):
|
||||
os.chdir(tmpdir)
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
self.assertTrue(hasattr(converter, "scrape_page_async"))
|
||||
self.assertTrue(asyncio.iscoroutinefunction(converter.scrape_page_async))
|
||||
self.assertTrue(
|
||||
asyncio.iscoroutinefunction(converter.scrape_page_async)
|
||||
)
|
||||
finally:
|
||||
os.chdir(self.original_cwd)
|
||||
|
||||
@@ -177,9 +179,12 @@ class TestAsyncRouting(unittest.TestCase):
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
|
||||
# Mock scrape_all_async to verify it does NOT get called
|
||||
with patch.object(
|
||||
converter, "scrape_all_async", new_callable=AsyncMock
|
||||
) as mock_async, patch.object(converter, "_try_llms_txt", return_value=False):
|
||||
with (
|
||||
patch.object(
|
||||
converter, "scrape_all_async", new_callable=AsyncMock
|
||||
) as mock_async,
|
||||
patch.object(converter, "_try_llms_txt", return_value=False),
|
||||
):
|
||||
converter.scrape_all()
|
||||
# Verify async version was NOT called
|
||||
mock_async.assert_not_called()
|
||||
@@ -258,7 +263,9 @@ class TestAsyncErrorHandling(unittest.TestCase):
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Mock client.get to raise exception
|
||||
with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")):
|
||||
with patch.object(
|
||||
client, "get", side_effect=httpx.HTTPError("Test error")
|
||||
):
|
||||
# Should not raise exception, just log error
|
||||
await converter.scrape_page_async(
|
||||
"https://example.com/test", semaphore, client
|
||||
@@ -316,7 +323,10 @@ class TestAsyncLlmsTxtIntegration(unittest.TestCase):
|
||||
converter = DocToSkillConverter(config, dry_run=False)
|
||||
|
||||
# Mock _try_llms_txt to return True (llms.txt found)
|
||||
with patch.object(converter, "_try_llms_txt", return_value=True), patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(converter, "_try_llms_txt", return_value=True),
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
# If llms.txt succeeded, async scraping should be skipped
|
||||
# Verify by checking that pages were not scraped
|
||||
|
||||
@@ -62,7 +62,11 @@ class TestGitHubScraperInitialization(unittest.TestCase):
|
||||
|
||||
def test_init_with_token_from_config(self):
|
||||
"""Test initialization with token from config"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": "test_token_123"}
|
||||
config = {
|
||||
"repo": "facebook/react",
|
||||
"name": "react",
|
||||
"github_token": "test_token_123",
|
||||
}
|
||||
|
||||
with patch("skill_seekers.cli.github_scraper.Github") as mock_github:
|
||||
_scraper = self.GitHubScraper(config)
|
||||
@@ -72,7 +76,10 @@ class TestGitHubScraperInitialization(unittest.TestCase):
|
||||
"""Test initialization with token from environment variable"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": None}
|
||||
|
||||
with patch.dict(os.environ, {"GITHUB_TOKEN": "env_token_456"}), patch("skill_seekers.cli.github_scraper.Github") as mock_github:
|
||||
with (
|
||||
patch.dict(os.environ, {"GITHUB_TOKEN": "env_token_456"}),
|
||||
patch("skill_seekers.cli.github_scraper.Github") as mock_github,
|
||||
):
|
||||
_scraper = self.GitHubScraper(config)
|
||||
mock_github.assert_called_once_with("env_token_456")
|
||||
|
||||
@@ -80,14 +87,21 @@ class TestGitHubScraperInitialization(unittest.TestCase):
|
||||
"""Test initialization without authentication"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": None}
|
||||
|
||||
with patch("skill_seekers.cli.github_scraper.Github"), patch.dict(os.environ, {}, clear=True):
|
||||
with (
|
||||
patch("skill_seekers.cli.github_scraper.Github"),
|
||||
patch.dict(os.environ, {}, clear=True),
|
||||
):
|
||||
scraper = self.GitHubScraper(config)
|
||||
# Should create unauthenticated client
|
||||
self.assertIsNotNone(scraper.github)
|
||||
|
||||
def test_token_priority_env_over_config(self):
|
||||
"""Test that GITHUB_TOKEN env var takes priority over config"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": "config_token"}
|
||||
config = {
|
||||
"repo": "facebook/react",
|
||||
"name": "react",
|
||||
"github_token": "config_token",
|
||||
}
|
||||
|
||||
with patch.dict(os.environ, {"GITHUB_TOKEN": "env_token"}):
|
||||
scraper = self.GitHubScraper(config)
|
||||
@@ -120,7 +134,9 @@ class TestREADMEExtraction(unittest.TestCase):
|
||||
scraper._extract_readme()
|
||||
|
||||
self.assertIn("readme", scraper.extracted_data)
|
||||
self.assertEqual(scraper.extracted_data["readme"], "# React\n\nA JavaScript library")
|
||||
self.assertEqual(
|
||||
scraper.extracted_data["readme"], "# React\n\nA JavaScript library"
|
||||
)
|
||||
|
||||
def test_extract_readme_tries_multiple_locations(self):
|
||||
"""Test that README extraction tries multiple file locations"""
|
||||
@@ -177,7 +193,10 @@ class TestLanguageDetection(unittest.TestCase):
|
||||
with patch("skill_seekers.cli.github_scraper.Github"):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_languages.return_value = {"JavaScript": 8000, "TypeScript": 2000}
|
||||
scraper.repo.get_languages.return_value = {
|
||||
"JavaScript": 8000,
|
||||
"TypeScript": 2000,
|
||||
}
|
||||
|
||||
scraper._extract_languages()
|
||||
|
||||
@@ -221,7 +240,12 @@ class TestIssuesExtraction(unittest.TestCase):
|
||||
|
||||
def test_extract_issues_success(self):
|
||||
"""Test successful issues extraction"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 10}
|
||||
config = {
|
||||
"repo": "facebook/react",
|
||||
"name": "react",
|
||||
"github_token": None,
|
||||
"max_issues": 10,
|
||||
}
|
||||
|
||||
# Create mock issues
|
||||
mock_label1 = Mock()
|
||||
@@ -286,7 +310,12 @@ class TestIssuesExtraction(unittest.TestCase):
|
||||
|
||||
def test_extract_issues_filters_pull_requests(self):
|
||||
"""Test that pull requests are filtered out from issues"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 10}
|
||||
config = {
|
||||
"repo": "facebook/react",
|
||||
"name": "react",
|
||||
"github_token": None,
|
||||
"max_issues": 10,
|
||||
}
|
||||
|
||||
# Create mock issue (need all required attributes)
|
||||
mock_issue = Mock()
|
||||
@@ -321,7 +350,12 @@ class TestIssuesExtraction(unittest.TestCase):
|
||||
|
||||
def test_extract_issues_respects_max_limit(self):
|
||||
"""Test that max_issues limit is respected"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 2}
|
||||
config = {
|
||||
"repo": "facebook/react",
|
||||
"name": "react",
|
||||
"github_token": None,
|
||||
"max_issues": 2,
|
||||
}
|
||||
|
||||
# Create 5 mock issues
|
||||
mock_issues = []
|
||||
@@ -443,9 +477,15 @@ class TestReleasesExtraction(unittest.TestCase):
|
||||
mock_release1.prerelease = False
|
||||
mock_release1.created_at = datetime(2023, 3, 1)
|
||||
mock_release1.published_at = datetime(2023, 3, 1)
|
||||
mock_release1.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0"
|
||||
mock_release1.tarball_url = "https://github.com/facebook/react/archive/v18.0.0.tar.gz"
|
||||
mock_release1.zipball_url = "https://github.com/facebook/react/archive/v18.0.0.zip"
|
||||
mock_release1.html_url = (
|
||||
"https://github.com/facebook/react/releases/tag/v18.0.0"
|
||||
)
|
||||
mock_release1.tarball_url = (
|
||||
"https://github.com/facebook/react/archive/v18.0.0.tar.gz"
|
||||
)
|
||||
mock_release1.zipball_url = (
|
||||
"https://github.com/facebook/react/archive/v18.0.0.zip"
|
||||
)
|
||||
|
||||
mock_release2 = Mock()
|
||||
mock_release2.tag_name = "v18.0.0-rc.0"
|
||||
@@ -455,9 +495,15 @@ class TestReleasesExtraction(unittest.TestCase):
|
||||
mock_release2.prerelease = True
|
||||
mock_release2.created_at = datetime(2023, 2, 1)
|
||||
mock_release2.published_at = datetime(2023, 2, 1)
|
||||
mock_release2.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0"
|
||||
mock_release2.tarball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz"
|
||||
mock_release2.zipball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip"
|
||||
mock_release2.html_url = (
|
||||
"https://github.com/facebook/react/releases/tag/v18.0.0-rc.0"
|
||||
)
|
||||
mock_release2.tarball_url = (
|
||||
"https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz"
|
||||
)
|
||||
mock_release2.zipball_url = (
|
||||
"https://github.com/facebook/react/archive/v18.0.0-rc.0.zip"
|
||||
)
|
||||
|
||||
with patch("skill_seekers.cli.github_scraper.Github"):
|
||||
scraper = self.GitHubScraper(config)
|
||||
@@ -566,7 +612,9 @@ class TestGitHubToSkillConverter(unittest.TestCase):
|
||||
config = {"repo": "facebook/react", "name": "test", "description": "Test skill"}
|
||||
|
||||
# Override data file path
|
||||
with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__") as mock_init:
|
||||
with patch(
|
||||
"skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__"
|
||||
) as mock_init:
|
||||
mock_init.return_value = None
|
||||
converter = self.GitHubToSkillConverter(config)
|
||||
converter.data_file = str(self.data_file)
|
||||
@@ -733,7 +781,8 @@ class TestSymlinkHandling(unittest.TestCase):
|
||||
# Should successfully extract README content
|
||||
self.assertIn("readme", scraper.extracted_data)
|
||||
self.assertEqual(
|
||||
scraper.extracted_data["readme"], "# AI SDK\n\nThe AI SDK is a TypeScript toolkit"
|
||||
scraper.extracted_data["readme"],
|
||||
"# AI SDK\n\nThe AI SDK is a TypeScript toolkit",
|
||||
)
|
||||
|
||||
def test_extract_changelog_with_symlink(self):
|
||||
@@ -815,7 +864,8 @@ class TestSymlinkHandling(unittest.TestCase):
|
||||
# Should download via download_url
|
||||
self.assertEqual(result, "# Changelog\n\n## v1.0.0\n- Initial release")
|
||||
mock_requests.assert_called_once_with(
|
||||
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md", timeout=30
|
||||
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md",
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
def test_extract_changelog_large_file(self):
|
||||
@@ -950,7 +1000,9 @@ class TestErrorHandling(unittest.TestCase):
|
||||
with patch("skill_seekers.cli.github_scraper.Github"):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = None
|
||||
scraper.github.get_repo = Mock(side_effect=GithubException(404, "Not found"))
|
||||
scraper.github.get_repo = Mock(
|
||||
side_effect=GithubException(404, "Not found")
|
||||
)
|
||||
|
||||
# Should raise ValueError with helpful message
|
||||
with self.assertRaises(ValueError) as context:
|
||||
@@ -960,12 +1012,19 @@ class TestErrorHandling(unittest.TestCase):
|
||||
|
||||
def test_rate_limit_error(self):
|
||||
"""Test handling of rate limit errors"""
|
||||
config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 10}
|
||||
config = {
|
||||
"repo": "facebook/react",
|
||||
"name": "react",
|
||||
"github_token": None,
|
||||
"max_issues": 10,
|
||||
}
|
||||
|
||||
with patch("skill_seekers.cli.github_scraper.Github"):
|
||||
scraper = self.GitHubScraper(config)
|
||||
scraper.repo = Mock()
|
||||
scraper.repo.get_issues.side_effect = GithubException(403, "Rate limit exceeded")
|
||||
scraper.repo.get_issues.side_effect = GithubException(
|
||||
403, "Rate limit exceeded"
|
||||
)
|
||||
|
||||
# Should handle gracefully and log warning
|
||||
scraper._extract_issues()
|
||||
|
||||
@@ -28,9 +28,13 @@ class TestGuideEnhancerModeDetection:
|
||||
|
||||
def test_auto_mode_with_api_key(self):
|
||||
"""Test auto mode detects API when key present and library available"""
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="auto")
|
||||
# Will be 'api' if library available, otherwise 'local' or 'none'
|
||||
@@ -80,7 +84,12 @@ class TestGuideEnhancerStepDescriptions:
|
||||
def test_enhance_step_descriptions_none_mode(self):
|
||||
"""Test step descriptions in none mode returns empty"""
|
||||
enhancer = GuideEnhancer(mode="none")
|
||||
steps = [{"description": "scraper.scrape(url)", "code": "result = scraper.scrape(url)"}]
|
||||
steps = [
|
||||
{
|
||||
"description": "scraper.scrape(url)",
|
||||
"code": "result = scraper.scrape(url)",
|
||||
}
|
||||
]
|
||||
result = enhancer.enhance_step_descriptions(steps)
|
||||
assert result == []
|
||||
|
||||
@@ -99,9 +108,13 @@ class TestGuideEnhancerStepDescriptions:
|
||||
}
|
||||
)
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="api")
|
||||
if enhancer.mode != "api":
|
||||
@@ -163,9 +176,13 @@ class TestGuideEnhancerTroubleshooting:
|
||||
}
|
||||
)
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="api")
|
||||
if enhancer.mode != "api":
|
||||
@@ -175,7 +192,9 @@ class TestGuideEnhancerTroubleshooting:
|
||||
|
||||
guide_data = {
|
||||
"title": "Test Guide",
|
||||
"steps": [{"description": "import requests", "code": "import requests"}],
|
||||
"steps": [
|
||||
{"description": "import requests", "code": "import requests"}
|
||||
],
|
||||
"language": "python",
|
||||
}
|
||||
result = enhancer.enhance_troubleshooting(guide_data)
|
||||
@@ -224,9 +243,13 @@ class TestGuideEnhancerPrerequisites:
|
||||
}
|
||||
)
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="api")
|
||||
if enhancer.mode != "api":
|
||||
@@ -267,9 +290,13 @@ class TestGuideEnhancerNextSteps:
|
||||
}
|
||||
)
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="api")
|
||||
if enhancer.mode != "api":
|
||||
@@ -277,7 +304,10 @@ class TestGuideEnhancerNextSteps:
|
||||
|
||||
enhancer.client = Mock()
|
||||
|
||||
guide_data = {"title": "How to Scrape Docs", "description": "Basic scraping"}
|
||||
guide_data = {
|
||||
"title": "How to Scrape Docs",
|
||||
"description": "Basic scraping",
|
||||
}
|
||||
result = enhancer.enhance_next_steps(guide_data)
|
||||
|
||||
assert len(result) == 3
|
||||
@@ -307,9 +337,13 @@ class TestGuideEnhancerUseCases:
|
||||
}
|
||||
)
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="api")
|
||||
if enhancer.mode != "api":
|
||||
@@ -358,7 +392,11 @@ class TestGuideEnhancerFullWorkflow:
|
||||
mock_call.return_value = json.dumps(
|
||||
{
|
||||
"step_descriptions": [
|
||||
{"step_index": 0, "explanation": "Import required libraries", "variations": []},
|
||||
{
|
||||
"step_index": 0,
|
||||
"explanation": "Import required libraries",
|
||||
"variations": [],
|
||||
},
|
||||
{
|
||||
"step_index": 1,
|
||||
"explanation": "Initialize scraper instance",
|
||||
@@ -374,16 +412,24 @@ class TestGuideEnhancerFullWorkflow:
|
||||
}
|
||||
],
|
||||
"prerequisites_detailed": [
|
||||
{"name": "requests", "why": "HTTP client", "setup": "pip install requests"}
|
||||
{
|
||||
"name": "requests",
|
||||
"why": "HTTP client",
|
||||
"setup": "pip install requests",
|
||||
}
|
||||
],
|
||||
"next_steps": ["How to add authentication"],
|
||||
"use_cases": ["Automate documentation extraction"],
|
||||
}
|
||||
)
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
|
||||
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
|
||||
patch(
|
||||
"skill_seekers.cli.guide_enhancer.anthropic", create=True
|
||||
) as mock_anthropic,
|
||||
):
|
||||
mock_anthropic.Anthropic = Mock()
|
||||
enhancer = GuideEnhancer(mode="api")
|
||||
if enhancer.mode != "api":
|
||||
@@ -415,7 +461,9 @@ class TestGuideEnhancerFullWorkflow:
|
||||
"""Test graceful fallback on enhancement error"""
|
||||
enhancer = GuideEnhancer(mode="none")
|
||||
|
||||
with patch.object(enhancer, "enhance_guide", side_effect=Exception("API error")):
|
||||
with patch.object(
|
||||
enhancer, "enhance_guide", side_effect=Exception("API error")
|
||||
):
|
||||
guide_data = {
|
||||
"title": "Test",
|
||||
"steps": [],
|
||||
@@ -485,7 +533,9 @@ class TestGuideEnhancerPromptGeneration:
|
||||
|
||||
guide_data = {
|
||||
"title": "How to Test",
|
||||
"steps": [{"description": "Write test", "code": "def test_example(): pass"}],
|
||||
"steps": [
|
||||
{"description": "Write test", "code": "def test_example(): pass"}
|
||||
],
|
||||
"language": "python",
|
||||
"prerequisites": ["pytest"],
|
||||
}
|
||||
@@ -533,7 +583,9 @@ class TestGuideEnhancerResponseParsing:
|
||||
|
||||
response = json.dumps(
|
||||
{
|
||||
"step_descriptions": [{"step_index": 0, "explanation": "Test", "variations": []}],
|
||||
"step_descriptions": [
|
||||
{"step_index": 0, "explanation": "Test", "variations": []}
|
||||
],
|
||||
"troubleshooting": [],
|
||||
"prerequisites_detailed": [],
|
||||
"next_steps": [],
|
||||
|
||||
@@ -174,7 +174,9 @@ class TestInstallToAgent:
|
||||
self.skill_dir.mkdir()
|
||||
|
||||
# Create SKILL.md
|
||||
(self.skill_dir / "SKILL.md").write_text("# Test Skill\n\nThis is a test skill.")
|
||||
(self.skill_dir / "SKILL.md").write_text(
|
||||
"# Test Skill\n\nThis is a test skill."
|
||||
)
|
||||
|
||||
# Create references directory with files
|
||||
refs_dir = self.skill_dir / "references"
|
||||
@@ -195,8 +197,13 @@ class TestInstallToAgent:
|
||||
with tempfile.TemporaryDirectory() as agent_tmpdir:
|
||||
agent_path = Path(agent_tmpdir) / ".claude" / "skills"
|
||||
|
||||
with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path):
|
||||
success, message = install_to_agent(self.skill_dir, "claude", force=True)
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
return_value=agent_path,
|
||||
):
|
||||
success, message = install_to_agent(
|
||||
self.skill_dir, "claude", force=True
|
||||
)
|
||||
|
||||
assert success is True
|
||||
target_path = agent_path / "test-skill"
|
||||
@@ -208,8 +215,13 @@ class TestInstallToAgent:
|
||||
with tempfile.TemporaryDirectory() as agent_tmpdir:
|
||||
agent_path = Path(agent_tmpdir) / ".claude" / "skills"
|
||||
|
||||
with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path):
|
||||
success, message = install_to_agent(self.skill_dir, "claude", force=True)
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
return_value=agent_path,
|
||||
):
|
||||
success, message = install_to_agent(
|
||||
self.skill_dir, "claude", force=True
|
||||
)
|
||||
|
||||
assert success is True
|
||||
target_path = agent_path / "test-skill"
|
||||
@@ -230,8 +242,13 @@ class TestInstallToAgent:
|
||||
with tempfile.TemporaryDirectory() as agent_tmpdir:
|
||||
agent_path = Path(agent_tmpdir) / ".claude" / "skills"
|
||||
|
||||
with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path):
|
||||
success, message = install_to_agent(self.skill_dir, "claude", force=True)
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
return_value=agent_path,
|
||||
):
|
||||
success, message = install_to_agent(
|
||||
self.skill_dir, "claude", force=True
|
||||
)
|
||||
|
||||
assert success is True
|
||||
target_path = agent_path / "test-skill"
|
||||
@@ -248,8 +265,13 @@ class TestInstallToAgent:
|
||||
target_path = agent_path / "test-skill"
|
||||
target_path.mkdir(parents=True)
|
||||
|
||||
with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path):
|
||||
success, message = install_to_agent(self.skill_dir, "claude", force=False)
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
return_value=agent_path,
|
||||
):
|
||||
success, message = install_to_agent(
|
||||
self.skill_dir, "claude", force=False
|
||||
)
|
||||
|
||||
assert success is False
|
||||
assert "already installed" in message.lower()
|
||||
@@ -263,8 +285,13 @@ class TestInstallToAgent:
|
||||
target_path.mkdir(parents=True)
|
||||
(target_path / "old_file.txt").write_text("old content")
|
||||
|
||||
with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path):
|
||||
success, message = install_to_agent(self.skill_dir, "claude", force=True)
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
return_value=agent_path,
|
||||
):
|
||||
success, message = install_to_agent(
|
||||
self.skill_dir, "claude", force=True
|
||||
)
|
||||
|
||||
assert success is True
|
||||
# Old file should be gone
|
||||
@@ -297,8 +324,13 @@ class TestInstallToAgent:
|
||||
with tempfile.TemporaryDirectory() as agent_tmpdir:
|
||||
agent_path = Path(agent_tmpdir) / ".claude" / "skills"
|
||||
|
||||
with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path):
|
||||
success, message = install_to_agent(self.skill_dir, "claude", dry_run=True)
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
return_value=agent_path,
|
||||
):
|
||||
success, message = install_to_agent(
|
||||
self.skill_dir, "claude", dry_run=True
|
||||
)
|
||||
|
||||
assert success is True
|
||||
assert "DRY RUN" in message
|
||||
@@ -329,7 +361,8 @@ class TestInstallToAllAgents:
|
||||
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
|
||||
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
side_effect=mock_get_agent_path,
|
||||
):
|
||||
results = install_to_all_agents(self.skill_dir, force=True)
|
||||
|
||||
@@ -360,7 +393,8 @@ class TestInstallToAllAgents:
|
||||
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
|
||||
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
side_effect=mock_get_agent_path,
|
||||
):
|
||||
# Without force - should fail
|
||||
results_no_force = install_to_all_agents(self.skill_dir, force=False)
|
||||
@@ -415,7 +449,10 @@ class TestInstallAgentCLI:
|
||||
|
||||
def test_cli_requires_agent_flag(self):
|
||||
"""Test that CLI fails without --agent flag."""
|
||||
with pytest.raises(SystemExit) as exc_info, patch("sys.argv", ["install_agent.py", str(self.skill_dir)]):
|
||||
with (
|
||||
pytest.raises(SystemExit) as exc_info,
|
||||
patch("sys.argv", ["install_agent.py", str(self.skill_dir)]),
|
||||
):
|
||||
main()
|
||||
|
||||
# Missing required argument exits with code 2
|
||||
@@ -428,17 +465,29 @@ class TestInstallAgentCLI:
|
||||
def mock_get_agent_path(agent_name, _project_root=None):
|
||||
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
|
||||
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
|
||||
), patch(
|
||||
"sys.argv",
|
||||
["install_agent.py", str(self.skill_dir), "--agent", "claude", "--dry-run"],
|
||||
with (
|
||||
patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
side_effect=mock_get_agent_path,
|
||||
),
|
||||
patch(
|
||||
"sys.argv",
|
||||
[
|
||||
"install_agent.py",
|
||||
str(self.skill_dir),
|
||||
"--agent",
|
||||
"claude",
|
||||
"--dry-run",
|
||||
],
|
||||
),
|
||||
):
|
||||
exit_code = main()
|
||||
|
||||
assert exit_code == 0
|
||||
# Directory should NOT be created
|
||||
assert not (Path(agent_tmpdir) / ".claude" / "skills" / "test-skill").exists()
|
||||
assert not (
|
||||
Path(agent_tmpdir) / ".claude" / "skills" / "test-skill"
|
||||
).exists()
|
||||
|
||||
def test_cli_integration(self):
|
||||
"""Test end-to-end CLI execution."""
|
||||
@@ -447,11 +496,21 @@ class TestInstallAgentCLI:
|
||||
def mock_get_agent_path(agent_name, _project_root=None):
|
||||
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
|
||||
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
|
||||
), patch(
|
||||
"sys.argv",
|
||||
["install_agent.py", str(self.skill_dir), "--agent", "claude", "--force"],
|
||||
with (
|
||||
patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
side_effect=mock_get_agent_path,
|
||||
),
|
||||
patch(
|
||||
"sys.argv",
|
||||
[
|
||||
"install_agent.py",
|
||||
str(self.skill_dir),
|
||||
"--agent",
|
||||
"claude",
|
||||
"--force",
|
||||
],
|
||||
),
|
||||
):
|
||||
exit_code = main()
|
||||
|
||||
@@ -468,11 +527,21 @@ class TestInstallAgentCLI:
|
||||
def mock_get_agent_path(agent_name, _project_root=None):
|
||||
return Path(agent_tmpdir) / f".{agent_name}" / "skills"
|
||||
|
||||
with patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path
|
||||
), patch(
|
||||
"sys.argv",
|
||||
["install_agent.py", str(self.skill_dir), "--agent", "all", "--force"],
|
||||
with (
|
||||
patch(
|
||||
"skill_seekers.cli.install_agent.get_agent_path",
|
||||
side_effect=mock_get_agent_path,
|
||||
),
|
||||
patch(
|
||||
"sys.argv",
|
||||
[
|
||||
"install_agent.py",
|
||||
str(self.skill_dir),
|
||||
"--agent",
|
||||
"all",
|
||||
"--force",
|
||||
],
|
||||
),
|
||||
):
|
||||
exit_code = main()
|
||||
|
||||
|
||||
@@ -50,7 +50,9 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
|
||||
# Mock large CHANGELOG (1.4MB, encoding="none")
|
||||
mock_content = Mock()
|
||||
mock_content.type = "file"
|
||||
mock_content.encoding = "none" # This is what GitHub API returns for large files
|
||||
mock_content.encoding = (
|
||||
"none" # This is what GitHub API returns for large files
|
||||
)
|
||||
mock_content.size = 1388271
|
||||
mock_content.download_url = (
|
||||
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
|
||||
@@ -73,13 +75,16 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
|
||||
|
||||
# VERIFY: download_url was called
|
||||
mock_requests.assert_called_once_with(
|
||||
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md", timeout=30
|
||||
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md",
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
# VERIFY: CHANGELOG was extracted successfully
|
||||
self.assertIn("changelog", scraper.extracted_data)
|
||||
self.assertIn("Bug fixes", scraper.extracted_data["changelog"])
|
||||
self.assertEqual(scraper.extracted_data["changelog"], mock_response.text)
|
||||
self.assertEqual(
|
||||
scraper.extracted_data["changelog"], mock_response.text
|
||||
)
|
||||
|
||||
def test_large_file_fallback_on_error(self):
|
||||
"""E2E: Verify graceful handling if download_url fails"""
|
||||
@@ -179,7 +184,8 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
|
||||
# VERIFY: sys.argv contains --enhance-local flag
|
||||
# (main.py should have added it before calling github_scraper)
|
||||
called_with_enhance = any(
|
||||
"--enhance-local" in str(call) for call in mock_github_main.call_args_list
|
||||
"--enhance-local" in str(call)
|
||||
for call in mock_github_main.call_args_list
|
||||
)
|
||||
self.assertTrue(
|
||||
called_with_enhance or "--enhance-local" in sys.argv,
|
||||
@@ -220,9 +226,12 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
|
||||
|
||||
with (
|
||||
patch.dict(
|
||||
os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}
|
||||
os.environ,
|
||||
{"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url},
|
||||
),
|
||||
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
|
||||
patch(
|
||||
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
|
||||
) as mock_anthropic,
|
||||
):
|
||||
# Create enhancer
|
||||
_enhancer = SkillEnhancer(self.skill_dir)
|
||||
@@ -249,7 +258,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
|
||||
# Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_AUTH_TOKEN": custom_token}, clear=True),
|
||||
patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
|
||||
patch(
|
||||
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
|
||||
) as mock_anthropic,
|
||||
):
|
||||
# Create enhancer (should accept ANTHROPIC_AUTH_TOKEN)
|
||||
enhancer = SkillEnhancer(self.skill_dir)
|
||||
@@ -265,7 +276,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
|
||||
mock_anthropic.assert_called_once()
|
||||
call_kwargs = mock_anthropic.call_args[1]
|
||||
self.assertEqual(
|
||||
call_kwargs["api_key"], custom_token, "api_key should match ANTHROPIC_AUTH_TOKEN"
|
||||
call_kwargs["api_key"],
|
||||
custom_token,
|
||||
"api_key should match ANTHROPIC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
def test_thinking_block_handling(self):
|
||||
@@ -275,7 +288,12 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
|
||||
except ImportError:
|
||||
self.skipTest("anthropic package not installed")
|
||||
|
||||
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}), patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic:
|
||||
with (
|
||||
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}),
|
||||
patch(
|
||||
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
|
||||
) as mock_anthropic,
|
||||
):
|
||||
enhancer = SkillEnhancer(self.skill_dir)
|
||||
|
||||
# Mock response with ThinkingBlock (newer SDK)
|
||||
@@ -283,7 +301,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
|
||||
mock_thinking_block = SimpleNamespace(type="thinking")
|
||||
|
||||
# TextBlock has .text attribute
|
||||
mock_text_block = SimpleNamespace(text="# Enhanced SKILL.md\n\nContent here")
|
||||
mock_text_block = SimpleNamespace(
|
||||
text="# Enhanced SKILL.md\n\nContent here"
|
||||
)
|
||||
|
||||
mock_message = Mock()
|
||||
mock_message.content = [mock_thinking_block, mock_text_block]
|
||||
|
||||
@@ -30,7 +30,12 @@ def test_timeout_with_retry():
|
||||
"""Test timeout scenario with retry logic"""
|
||||
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
|
||||
|
||||
with patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, patch("time.sleep") as mock_sleep: # Mock sleep to speed up test
|
||||
with (
|
||||
patch(
|
||||
"requests.get", side_effect=requests.Timeout("Connection timeout")
|
||||
) as mock_get,
|
||||
patch("time.sleep") as mock_sleep,
|
||||
): # Mock sleep to speed up test
|
||||
content = downloader.download()
|
||||
|
||||
assert content is None
|
||||
@@ -73,7 +78,10 @@ def test_http_error_handling():
|
||||
mock_response = Mock()
|
||||
mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found")
|
||||
|
||||
with patch("requests.get", return_value=mock_response) as mock_get, patch("time.sleep"):
|
||||
with (
|
||||
patch("requests.get", return_value=mock_response) as mock_get,
|
||||
patch("time.sleep"),
|
||||
):
|
||||
content = downloader.download()
|
||||
|
||||
assert content is None
|
||||
@@ -84,7 +92,10 @@ def test_exponential_backoff():
|
||||
"""Test that exponential backoff delays are correct"""
|
||||
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=3)
|
||||
|
||||
with patch("requests.get", side_effect=requests.Timeout("Connection timeout")), patch("time.sleep") as mock_sleep:
|
||||
with (
|
||||
patch("requests.get", side_effect=requests.Timeout("Connection timeout")),
|
||||
patch("time.sleep") as mock_sleep,
|
||||
):
|
||||
content = downloader.download()
|
||||
|
||||
assert content is None
|
||||
@@ -132,7 +143,9 @@ def test_custom_max_retries():
|
||||
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
|
||||
|
||||
with (
|
||||
patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
|
||||
patch(
|
||||
"requests.get", side_effect=requests.Timeout("Connection timeout")
|
||||
) as mock_get,
|
||||
patch("time.sleep"),
|
||||
):
|
||||
content = downloader.download()
|
||||
@@ -190,9 +203,7 @@ def test_is_markdown_rejects_html_doctype():
|
||||
"""Test that HTML with DOCTYPE is rejected (prevents redirect trap)"""
|
||||
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
||||
|
||||
html = (
|
||||
"<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
|
||||
)
|
||||
html = "<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
|
||||
assert not downloader._is_markdown(html)
|
||||
|
||||
# Test case-insensitive
|
||||
@@ -219,7 +230,9 @@ def test_is_markdown_rejects_html_meta():
|
||||
html_with_head = "<head><title>Page</title></head><body>Content</body>"
|
||||
assert not downloader._is_markdown(html_with_head)
|
||||
|
||||
html_with_meta = '<meta charset="utf-8"><meta name="viewport" content="width=device-width">'
|
||||
html_with_meta = (
|
||||
'<meta charset="utf-8"><meta name="viewport" content="width=device-width">'
|
||||
)
|
||||
assert not downloader._is_markdown(html_with_meta)
|
||||
|
||||
|
||||
@@ -231,7 +244,9 @@ def test_is_markdown_accepts_markdown_with_html_words():
|
||||
assert downloader._is_markdown(markdown)
|
||||
|
||||
# Test with actual markdown patterns
|
||||
markdown_with_code = "# HTML Tutorial\n\n```html\n<div>example</div>\n```\n\n## More content"
|
||||
markdown_with_code = (
|
||||
"# HTML Tutorial\n\n```html\n<div>example</div>\n```\n\n## More content"
|
||||
)
|
||||
assert downloader._is_markdown(markdown_with_code)
|
||||
|
||||
|
||||
@@ -240,7 +255,9 @@ def test_html_detection_only_scans_first_500_chars():
|
||||
downloader = LlmsTxtDownloader("https://example.com/llms.txt")
|
||||
|
||||
# HTML tag after 500 chars should not be detected
|
||||
safe_markdown = "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n<!DOCTYPE html>"
|
||||
safe_markdown = (
|
||||
"# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n<!DOCTYPE html>"
|
||||
)
|
||||
# This should pass because <!DOCTYPE html> is beyond first 500 chars
|
||||
if len(safe_markdown[:500]) < len("<!DOCTYPE html>"):
|
||||
# If the HTML is within 500 chars, adjust test
|
||||
@@ -277,7 +294,9 @@ def test_download_rejects_html_redirect():
|
||||
|
||||
mock_response = Mock()
|
||||
# Simulate server returning HTML instead of markdown
|
||||
mock_response.text = "<!DOCTYPE html><html><body><h1>Product Page</h1></body></html>"
|
||||
mock_response.text = (
|
||||
"<!DOCTYPE html><html><body><h1>Product Page</h1></body></html>"
|
||||
)
|
||||
mock_response.raise_for_status = Mock()
|
||||
|
||||
with patch("requests.get", return_value=mock_response):
|
||||
|
||||
@@ -72,7 +72,13 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase):
|
||||
os.chdir(tmpdir)
|
||||
converter = DocToSkillConverter(config, dry_run=False)
|
||||
|
||||
with patch.object(converter, "_try_llms_txt", return_value=False) as mock_try, patch.object(converter, "scrape_page"), patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(
|
||||
converter, "_try_llms_txt", return_value=False
|
||||
) as mock_try,
|
||||
patch.object(converter, "scrape_page"),
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
mock_try.assert_called_once()
|
||||
finally:
|
||||
@@ -93,7 +99,11 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase):
|
||||
os.chdir(tmpdir)
|
||||
converter = DocToSkillConverter(config, dry_run=False)
|
||||
|
||||
with patch.object(converter, "_try_llms_txt") as mock_try, patch.object(converter, "scrape_page"), patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(converter, "_try_llms_txt") as mock_try,
|
||||
patch.object(converter, "scrape_page"),
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
mock_try.assert_not_called()
|
||||
finally:
|
||||
@@ -114,7 +124,10 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase):
|
||||
os.chdir(tmpdir)
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
|
||||
with patch.object(converter, "_try_llms_txt") as mock_try, patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(converter, "_try_llms_txt") as mock_try,
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
mock_try.assert_not_called()
|
||||
finally:
|
||||
@@ -140,7 +153,13 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase):
|
||||
os.chdir(tmpdir)
|
||||
converter = DocToSkillConverter(config, dry_run=False)
|
||||
|
||||
with patch.object(converter, "_try_llms_txt", return_value=False) as mock_try, patch.object(converter, "scrape_page_async", return_value=None), patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(
|
||||
converter, "_try_llms_txt", return_value=False
|
||||
) as mock_try,
|
||||
patch.object(converter, "scrape_page_async", return_value=None),
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
mock_try.assert_called_once()
|
||||
finally:
|
||||
@@ -162,7 +181,11 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase):
|
||||
os.chdir(tmpdir)
|
||||
converter = DocToSkillConverter(config, dry_run=False)
|
||||
|
||||
with patch.object(converter, "_try_llms_txt") as mock_try, patch.object(converter, "scrape_page_async", return_value=None), patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(converter, "_try_llms_txt") as mock_try,
|
||||
patch.object(converter, "scrape_page_async", return_value=None),
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
mock_try.assert_not_called()
|
||||
finally:
|
||||
@@ -179,7 +202,10 @@ class TestSkipLlmsTxtWithRealConfig(unittest.TestCase):
|
||||
"description": "Telegram bot documentation",
|
||||
"base_url": "https://core.telegram.org/bots",
|
||||
"skip_llms_txt": True, # Telegram doesn't have useful llms.txt
|
||||
"start_urls": ["https://core.telegram.org/bots", "https://core.telegram.org/bots/api"],
|
||||
"start_urls": [
|
||||
"https://core.telegram.org/bots",
|
||||
"https://core.telegram.org/bots/api",
|
||||
],
|
||||
"selectors": {
|
||||
"main_content": "#dev_page_content, main, article",
|
||||
"title": "h1, title",
|
||||
@@ -226,7 +252,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
|
||||
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
self.assertFalse(converter.skip_llms_txt)
|
||||
self.assertTrue(any("Invalid value" in log and "0" in log for log in cm.output))
|
||||
self.assertTrue(
|
||||
any("Invalid value" in log and "0" in log for log in cm.output)
|
||||
)
|
||||
|
||||
def test_skip_llms_txt_with_int_one_logs_warning(self):
|
||||
"""Test that integer 1 logs warning and defaults to False."""
|
||||
@@ -240,7 +268,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
|
||||
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
self.assertFalse(converter.skip_llms_txt)
|
||||
self.assertTrue(any("Invalid value" in log and "1" in log for log in cm.output))
|
||||
self.assertTrue(
|
||||
any("Invalid value" in log and "1" in log for log in cm.output)
|
||||
)
|
||||
|
||||
def test_skip_llms_txt_with_string_logs_warning(self):
|
||||
"""Test that string values log warning and default to False."""
|
||||
@@ -254,7 +284,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
|
||||
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
self.assertFalse(converter.skip_llms_txt)
|
||||
self.assertTrue(any("Invalid value" in log and "true" in log for log in cm.output))
|
||||
self.assertTrue(
|
||||
any("Invalid value" in log and "true" in log for log in cm.output)
|
||||
)
|
||||
|
||||
def test_skip_llms_txt_with_none_logs_warning(self):
|
||||
"""Test that None logs warning and defaults to False."""
|
||||
@@ -268,7 +300,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
|
||||
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
|
||||
converter = DocToSkillConverter(config, dry_run=True)
|
||||
self.assertFalse(converter.skip_llms_txt)
|
||||
self.assertTrue(any("Invalid value" in log and "None" in log for log in cm.output))
|
||||
self.assertTrue(
|
||||
any("Invalid value" in log and "None" in log for log in cm.output)
|
||||
)
|
||||
|
||||
def test_scraping_proceeds_when_llms_txt_skipped(self):
|
||||
"""Test that HTML scraping proceeds normally when llms.txt is skipped."""
|
||||
@@ -292,7 +326,10 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
|
||||
scrape_called.append(url)
|
||||
return None
|
||||
|
||||
with patch.object(converter, "scrape_page", side_effect=mock_scrape), patch.object(converter, "save_summary"):
|
||||
with (
|
||||
patch.object(converter, "scrape_page", side_effect=mock_scrape),
|
||||
patch.object(converter, "save_summary"),
|
||||
):
|
||||
converter.scrape_all()
|
||||
# Should have attempted to scrape the base URL
|
||||
self.assertTrue(len(scrape_called) > 0)
|
||||
|
||||
Reference in New Issue
Block a user