style: Run ruff format on 15 files (CI fix)

CI uses 'ruff format' not 'black' - applied proper formatting:

Files reformatted by ruff:
- config_extractor.py
- doc_scraper.py
- how_to_guide_builder.py
- llms_txt_parser.py
- pattern_recognizer.py
- test_example_extractor.py
- unified_codebase_analyzer.py
- test_architecture_scenarios.py
- test_async_scraping.py
- test_github_scraper.py
- test_guide_enhancer.py
- test_install_agent.py
- test_issue_219_e2e.py
- test_llms_txt_downloader.py
- test_skip_llms_txt.py

Fixes CI formatting check failure.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-18 00:01:30 +03:00
parent 9d43956b1d
commit 85c8d9d385
15 changed files with 179 additions and 510 deletions

View File

@@ -89,9 +89,7 @@ class ConfigExtractionResult:
config_files: list[ConfigFile] = field(default_factory=list) config_files: list[ConfigFile] = field(default_factory=list)
total_files: int = 0 total_files: int = 0
total_settings: int = 0 total_settings: int = 0
detected_patterns: dict[str, list[str]] = field( detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files
default_factory=dict
) # pattern -> files
errors: list[str] = field(default_factory=list) errors: list[str] = field(default_factory=list)
def to_dict(self) -> dict: def to_dict(self) -> dict:
@@ -241,9 +239,7 @@ class ConfigFileDetector:
"*.egg-info", "*.egg-info",
} }
def find_config_files( def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]:
self, directory: Path, max_files: int = 100
) -> list[ConfigFile]:
""" """
Find all configuration files in directory. Find all configuration files in directory.
@@ -314,10 +310,7 @@ class ConfigFileDetector:
filename = file_path.name.lower() filename = file_path.name.lower()
# Database configs # Database configs
if any( if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]):
word in path_lower
for word in ["database", "db", "postgres", "mysql", "mongo"]
):
return "database_configuration" return "database_configuration"
# API configs # API configs
@@ -333,9 +326,7 @@ class ConfigFileDetector:
return "docker_configuration" return "docker_configuration"
# CI/CD configs # CI/CD configs
if any( if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]):
word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]
):
return "ci_cd_configuration" return "ci_cd_configuration"
# Package configs # Package configs
@@ -347,11 +338,7 @@ class ConfigFileDetector:
return "typescript_configuration" return "typescript_configuration"
# Framework configs # Framework configs
if ( if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename:
"next.config" in filename
or "vue.config" in filename
or "webpack.config" in filename
):
return "framework_configuration" return "framework_configuration"
# Environment configs # Environment configs
@@ -531,9 +518,7 @@ class ConfigParser:
for match in re.finditer(pattern, config_file.raw_content): for match in re.finditer(pattern, config_file.raw_content):
if len(match.groups()) >= 2: if len(match.groups()) >= 2:
key = match.group(1) key = match.group(1)
value = ( value = match.group(3) if len(match.groups()) > 2 else match.group(2)
match.group(3) if len(match.groups()) > 2 else match.group(2)
)
setting = ConfigSetting( setting = ConfigSetting(
key=key, value=value, value_type=self._infer_type(value) key=key, value=value, value_type=self._infer_type(value)
@@ -579,9 +564,7 @@ class ConfigParser:
for key, value in data.items(): for key, value in data.items():
if isinstance(value, dict): if isinstance(value, dict):
# Recurse into nested dicts # Recurse into nested dicts
self._extract_settings_from_dict( self._extract_settings_from_dict(value, config_file, parent_path + [key])
value, config_file, parent_path + [key]
)
else: else:
setting = ConfigSetting( setting = ConfigSetting(
key=".".join(parent_path + [key]) if parent_path else key, key=".".join(parent_path + [key]) if parent_path else key,
@@ -872,9 +855,7 @@ def main():
print("\n📊 Summary:") print("\n📊 Summary:")
print(f" Config files found: {result.total_files}") print(f" Config files found: {result.total_files}")
print(f" Total settings: {result.total_settings}") print(f" Total settings: {result.total_settings}")
print( print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}")
f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}"
)
if "ai_enhancements" in output_dict: if "ai_enhancements" in output_dict:
print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)") print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)")

View File

@@ -148,9 +148,7 @@ def infer_description_from_docs(
class DocToSkillConverter: class DocToSkillConverter:
def __init__( def __init__(self, config: dict[str, Any], dry_run: bool = False, resume: bool = False) -> None:
self, config: dict[str, Any], dry_run: bool = False, resume: bool = False
) -> None:
self.config = config self.config = config
self.name = config["name"] self.name = config["name"]
self.base_url = config["base_url"] self.base_url = config["base_url"]
@@ -165,9 +163,7 @@ class DocToSkillConverter:
# Checkpoint config # Checkpoint config
checkpoint_config = config.get("checkpoint", {}) checkpoint_config = config.get("checkpoint", {})
self.checkpoint_enabled = checkpoint_config.get("enabled", False) self.checkpoint_enabled = checkpoint_config.get("enabled", False)
self.checkpoint_interval = checkpoint_config.get( self.checkpoint_interval = checkpoint_config.get("interval", DEFAULT_CHECKPOINT_INTERVAL)
"interval", DEFAULT_CHECKPOINT_INTERVAL
)
# llms.txt detection state # llms.txt detection state
skip_llms_txt_value = config.get("skip_llms_txt", False) skip_llms_txt_value = config.get("skip_llms_txt", False)
@@ -322,9 +318,7 @@ class DocToSkillConverter:
for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
text = self.clean_text(h.get_text()) text = self.clean_text(h.get_text())
if text: if text:
page["headings"].append( page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")})
{"level": h.name, "text": text, "id": h.get("id", "")}
)
# Extract code with language detection # Extract code with language detection
code_selector = selectors.get("code_blocks", "pre code") code_selector = selectors.get("code_blocks", "pre code")
@@ -391,9 +385,7 @@ class DocToSkillConverter:
import re import re
# Detect if content is actually HTML (some .md URLs return HTML) # Detect if content is actually HTML (some .md URLs return HTML)
if content.strip().startswith("<!DOCTYPE") or content.strip().startswith( if content.strip().startswith("<!DOCTYPE") or content.strip().startswith("<html"):
"<html"
):
return self._extract_html_as_markdown(content, url) return self._extract_html_as_markdown(content, url)
page = { page = {
@@ -432,9 +424,7 @@ class DocToSkillConverter:
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL) code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
for lang, code in code_blocks: for lang, code in code_blocks:
if len(code.strip()) > 10: if len(code.strip()) > 10:
page["code_samples"].append( page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"})
{"code": code.strip(), "language": lang or "unknown"}
)
# Extract content (paragraphs) # Extract content (paragraphs)
content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL) content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
@@ -458,11 +448,7 @@ class DocToSkillConverter:
# Strip anchor fragments # Strip anchor fragments
full_url = full_url.split("#")[0] full_url = full_url.split("#")[0]
# Only include .md URLs to avoid client-side rendered HTML pages # Only include .md URLs to avoid client-side rendered HTML pages
if ( if ".md" in full_url and self.is_valid_url(full_url) and full_url not in page["links"]:
".md" in full_url
and self.is_valid_url(full_url)
and full_url not in page["links"]
):
page["links"].append(full_url) page["links"].append(full_url)
return page return page
@@ -526,18 +512,14 @@ class DocToSkillConverter:
for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
text = self.clean_text(h.get_text()) text = self.clean_text(h.get_text())
if text: if text:
page["headings"].append( page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")})
{"level": h.name, "text": text, "id": h.get("id", "")}
)
# Extract code blocks # Extract code blocks
for code_elem in main.select("pre code, pre"): for code_elem in main.select("pre code, pre"):
code = code_elem.get_text() code = code_elem.get_text()
if len(code.strip()) > 10: if len(code.strip()) > 10:
lang = self.detect_language(code_elem, code) lang = self.detect_language(code_elem, code)
page["code_samples"].append( page["code_samples"].append({"code": code.strip(), "language": lang})
{"code": code.strip(), "language": lang}
)
# Extract paragraphs # Extract paragraphs
paragraphs = [] paragraphs = []
@@ -558,9 +540,7 @@ class DocToSkillConverter:
# Log low-confidence detections for debugging # Log low-confidence detections for debugging
if confidence < 0.5: if confidence < 0.5:
logger.debug( logger.debug(f"Low confidence language detection: {lang} ({confidence:.2f})")
f"Low confidence language detection: {lang} ({confidence:.2f})"
)
return lang # Return string for backward compatibility return lang # Return string for backward compatibility
@@ -573,10 +553,7 @@ class DocToSkillConverter:
# Look for "Example:" or "Pattern:" sections # Look for "Example:" or "Pattern:" sections
for elem in main.find_all(["p", "div"]): for elem in main.find_all(["p", "div"]):
text = elem.get_text().lower() text = elem.get_text().lower()
if any( if any(word in text for word in ["example:", "pattern:", "usage:", "typical use"]):
word in text
for word in ["example:", "pattern:", "usage:", "typical use"]
):
# Get the code that follows # Get the code that follows
next_code = elem.find_next(["pre", "code"]) next_code = elem.find_next(["pre", "code"])
if next_code: if next_code:
@@ -598,9 +575,7 @@ class DocToSkillConverter:
"""Save page data (skip pages with empty content)""" """Save page data (skip pages with empty content)"""
# Skip pages with empty or very short content # Skip pages with empty or very short content
if not page.get("content") or len(page.get("content", "")) < 50: if not page.get("content") or len(page.get("content", "")) < 50:
logger.debug( logger.debug("Skipping page with empty/short content: %s", page.get("url", "unknown"))
"Skipping page with empty/short content: %s", page.get("url", "unknown")
)
return return
url_hash = hashlib.md5(page["url"].encode()).hexdigest()[:10] url_hash = hashlib.md5(page["url"].encode()).hexdigest()[:10]
@@ -648,10 +623,7 @@ class DocToSkillConverter:
# Add new URLs # Add new URLs
for link in page["links"]: for link in page["links"]:
if ( if link not in self.visited_urls and link not in self.pending_urls:
link not in self.visited_urls
and link not in self.pending_urls
):
self.pending_urls.append(link) self.pending_urls.append(link)
else: else:
# Single-threaded mode (no lock needed) # Single-threaded mode (no lock needed)
@@ -672,9 +644,7 @@ class DocToSkillConverter:
except Exception as e: except Exception as e:
if self.workers > 1: if self.workers > 1:
with self.lock: with self.lock:
logger.error( logger.error(" ✗ Error scraping %s: %s: %s", url, type(e).__name__, e)
" ✗ Error scraping %s: %s: %s", url, type(e).__name__, e
)
else: else:
logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e) logger.error(" ✗ Error scraping page: %s: %s", type(e).__name__, e)
logger.error(" URL: %s", url) logger.error(" URL: %s", url)
@@ -792,9 +762,7 @@ class DocToSkillConverter:
# Check for explicit config URL first # Check for explicit config URL first
explicit_url = self.config.get("llms_txt_url") explicit_url = self.config.get("llms_txt_url")
if explicit_url: if explicit_url:
logger.info( logger.info("\n📌 Using explicit llms_txt_url from config: %s", explicit_url)
"\n📌 Using explicit llms_txt_url from config: %s", explicit_url
)
# Download explicit file first # Download explicit file first
downloader = LlmsTxtDownloader(explicit_url) downloader = LlmsTxtDownloader(explicit_url)
@@ -915,9 +883,7 @@ class DocToSkillConverter:
logger.info("%s (%d chars)", filename, len(content)) logger.info("%s (%d chars)", filename, len(content))
if not downloaded: if not downloaded:
logger.warning( logger.warning("⚠️ Failed to download any variants, falling back to HTML scraping")
"⚠️ Failed to download any variants, falling back to HTML scraping"
)
return False return False
# Save ALL variants to references/ # Save ALL variants to references/
@@ -1032,9 +998,7 @@ class DocToSkillConverter:
# Single-threaded mode (original sequential logic) # Single-threaded mode (original sequential logic)
if self.workers <= 1: if self.workers <= 1:
while self.pending_urls and ( while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
unlimited or len(self.visited_urls) < preview_limit
):
url = self.pending_urls.popleft() url = self.pending_urls.popleft()
if url in self.visited_urls: if url in self.visited_urls:
@@ -1046,9 +1010,7 @@ class DocToSkillConverter:
# Just show what would be scraped # Just show what would be scraped
logger.info(" [Preview] %s", url) logger.info(" [Preview] %s", url)
try: try:
headers = { headers = {"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"}
"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"
}
response = requests.get(url, headers=headers, timeout=10) response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")
@@ -1060,16 +1022,11 @@ class DocToSkillConverter:
if main: if main:
for link in main.find_all("a", href=True): for link in main.find_all("a", href=True):
href = urljoin(url, link["href"]) href = urljoin(url, link["href"])
if ( if self.is_valid_url(href) and href not in self.visited_urls:
self.is_valid_url(href)
and href not in self.visited_urls
):
self.pending_urls.append(href) self.pending_urls.append(href)
except Exception as e: except Exception as e:
# Failed to extract links in fast mode, continue anyway # Failed to extract links in fast mode, continue anyway
logger.warning( logger.warning("⚠️ Warning: Could not extract links from %s: %s", url, e)
"⚠️ Warning: Could not extract links from %s: %s", url, e
)
else: else:
self.scrape_page(url) self.scrape_page(url)
self.pages_scraped += 1 self.pages_scraped += 1
@@ -1092,9 +1049,7 @@ class DocToSkillConverter:
with ThreadPoolExecutor(max_workers=self.workers) as executor: with ThreadPoolExecutor(max_workers=self.workers) as executor:
futures = [] futures = []
while self.pending_urls and ( while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
unlimited or len(self.visited_urls) < preview_limit
):
# Get next batch of URLs (thread-safe) # Get next batch of URLs (thread-safe)
batch = [] batch = []
batch_size = min(self.workers * 2, len(self.pending_urls)) batch_size = min(self.workers * 2, len(self.pending_urls))
@@ -1152,9 +1107,7 @@ class DocToSkillConverter:
self.pages_scraped += 1 self.pages_scraped += 1
if self.dry_run: if self.dry_run:
logger.info( logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
"\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)
)
if len(self.visited_urls) >= preview_limit: if len(self.visited_urls) >= preview_limit:
logger.info( logger.info(
" (showing first %d, actual scraping may find more)", " (showing first %d, actual scraping may find more)",
@@ -1221,9 +1174,7 @@ class DocToSkillConverter:
) as client: ) as client:
tasks = [] tasks = []
while self.pending_urls and ( while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit):
unlimited or len(self.visited_urls) < preview_limit
):
# Get next batch of URLs # Get next batch of URLs
batch = [] batch = []
batch_size = min(self.workers * 2, len(self.pending_urls)) batch_size = min(self.workers * 2, len(self.pending_urls))
@@ -1271,9 +1222,7 @@ class DocToSkillConverter:
await asyncio.gather(*tasks, return_exceptions=True) await asyncio.gather(*tasks, return_exceptions=True)
if self.dry_run: if self.dry_run:
logger.info( logger.info("\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls))
"\n✅ Dry run complete: would scrape ~%d pages", len(self.visited_urls)
)
if len(self.visited_urls) >= preview_limit: if len(self.visited_urls) >= preview_limit:
logger.info( logger.info(
" (showing first %d, actual scraping may find more)", " (showing first %d, actual scraping may find more)",
@@ -1323,9 +1272,7 @@ class DocToSkillConverter:
return pages return pages
def smart_categorize( def smart_categorize(self, pages: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
self, pages: list[dict[str, Any]]
) -> dict[str, list[dict[str, Any]]]:
"""Improved categorization with better pattern matching""" """Improved categorization with better pattern matching"""
category_defs = self.config.get("categories", {}) category_defs = self.config.get("categories", {})
@@ -1377,18 +1324,14 @@ class DocToSkillConverter:
for page in pages: for page in pages:
path = urlparse(page["url"]).path path = urlparse(page["url"]).path
segments = [ segments = [
s s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"]
for s in path.split("/")
if s and s not in ["en", "stable", "latest", "docs"]
] ]
for seg in segments: for seg in segments:
url_segments[seg] += 1 url_segments[seg] += 1
# Top segments become categories # Top segments become categories
top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[ top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[:8]
:8
]
categories = {} categories = {}
for seg, count in top_segments: for seg, count in top_segments:
@@ -1408,9 +1351,7 @@ class DocToSkillConverter:
return categories return categories
def generate_quick_reference( def generate_quick_reference(self, pages: list[dict[str, Any]]) -> list[dict[str, str]]:
self, pages: list[dict[str, Any]]
) -> list[dict[str, str]]:
"""Generate quick reference from common patterns (NEW FEATURE)""" """Generate quick reference from common patterns (NEW FEATURE)"""
quick_ref = [] quick_ref = []
@@ -1492,9 +1433,7 @@ class DocToSkillConverter:
if pages: if pages:
first_page_html = pages[0].get("raw_html", "") first_page_html = pages[0].get("raw_html", "")
break break
description = infer_description_from_docs( description = infer_description_from_docs(self.base_url, first_page_html, self.name)
self.base_url, first_page_html, self.name
)
else: else:
description = self.config["description"] description = self.config["description"]
@@ -1502,9 +1441,7 @@ class DocToSkillConverter:
example_codes = [] example_codes = []
for pages in categories.values(): for pages in categories.values():
for page in pages[:3]: # First 3 pages per category for page in pages[:3]: # First 3 pages per category
for sample in page.get("code_samples", [])[ for sample in page.get("code_samples", [])[:2]: # First 2 samples per page
:2
]: # First 2 samples per page
code = sample.get("code", sample if isinstance(sample, str) else "") code = sample.get("code", sample if isinstance(sample, str) else "")
lang = sample.get("language", "unknown") lang = sample.get("language", "unknown")
if len(code) < 200 and lang != "unknown": if len(code) < 200 and lang != "unknown":
@@ -1554,9 +1491,7 @@ This skill should be triggered when:
content += pattern.get("code", "")[:300] content += pattern.get("code", "")[:300]
content += "\n```\n\n" content += "\n```\n\n"
else: else:
content += ( content += "*Quick reference patterns will be added as you use the skill.*\n\n"
"*Quick reference patterns will be added as you use the skill.*\n\n"
)
# Add example codes from docs # Add example codes from docs
if example_codes: if example_codes:
@@ -1571,9 +1506,7 @@ This skill includes comprehensive documentation in `references/`:
""" """
for cat in sorted(categories.keys()): for cat in sorted(categories.keys()):
content += ( content += f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n"
)
content += """ content += """
Use `view` to read specific reference files when detailed information is needed. Use `view` to read specific reference files when detailed information is needed.
@@ -1721,9 +1654,7 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]:
) )
# Validate base_url # Validate base_url
if "base_url" in config and not config["base_url"].startswith( if "base_url" in config and not config["base_url"].startswith(("http://", "https://")):
("http://", "https://")
):
errors.append( errors.append(
f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)" f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)"
) )
@@ -1840,18 +1771,12 @@ def load_config(config_path: str) -> dict[str, Any]:
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
logger.error("❌ Error: Invalid JSON in config file: %s", config_path) logger.error("❌ Error: Invalid JSON in config file: %s", config_path)
logger.error(" Details: %s", e) logger.error(" Details: %s", e)
logger.error( logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno)
" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno
)
sys.exit(1) sys.exit(1)
except FileNotFoundError: except FileNotFoundError:
logger.error("❌ Error: Config file not found: %s", config_path) logger.error("❌ Error: Config file not found: %s", config_path)
logger.error( logger.error(" Suggestion: Create a config file or use an existing one from configs/")
" Suggestion: Create a config file or use an existing one from configs/" logger.error(" Available configs: react.json, vue.json, django.json, godot.json")
)
logger.error(
" Available configs: react.json, vue.json, django.json, godot.json"
)
sys.exit(1) sys.exit(1)
# Validate config # Validate config
@@ -1869,9 +1794,7 @@ def load_config(config_path: str) -> dict[str, Any]:
logger.error("❌ Configuration validation errors in %s:", config_path) logger.error("❌ Configuration validation errors in %s:", config_path)
for error in errors: for error in errors:
logger.error(" - %s", error) logger.error(" - %s", error)
logger.error( logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples")
"\n Suggestion: Fix the above errors or check configs/ for working examples"
)
sys.exit(1) sys.exit(1)
return config return config
@@ -2025,9 +1948,7 @@ def setup_argument_parser() -> argparse.ArgumentParser:
action="store_true", action="store_true",
help="Resume from last checkpoint (for interrupted scrapes)", help="Resume from last checkpoint (for interrupted scrapes)",
) )
parser.add_argument( parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh")
"--fresh", action="store_true", help="Clear checkpoint and start fresh"
)
parser.add_argument( parser.add_argument(
"--rate-limit", "--rate-limit",
"-r", "-r",
@@ -2126,15 +2047,11 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
if args.workers: if args.workers:
# Validate workers count # Validate workers count
if args.workers < 1: if args.workers < 1:
logger.error( logger.error("❌ Error: --workers must be at least 1 (got %d)", args.workers)
"❌ Error: --workers must be at least 1 (got %d)", args.workers
)
logger.error(" Suggestion: Use --workers 1 (default) or omit the flag") logger.error(" Suggestion: Use --workers 1 (default) or omit the flag")
sys.exit(1) sys.exit(1)
if args.workers > 10: if args.workers > 10:
logger.warning( logger.warning("⚠️ Warning: --workers capped at 10 (requested %d)", args.workers)
"⚠️ Warning: --workers capped at 10 (requested %d)", args.workers
)
args.workers = 10 args.workers = 10
config["workers"] = args.workers config["workers"] = args.workers
if args.workers > 1: if args.workers > 1:
@@ -2336,9 +2253,7 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non
# Suggest enhancement if not done # Suggest enhancement if not done
if not args.enhance and not args.enhance_local: if not args.enhance and not args.enhance_local:
logger.info("\n💡 Optional: Enhance SKILL.md with Claude:") logger.info("\n💡 Optional: Enhance SKILL.md with Claude:")
logger.info( logger.info(" Local (recommended): skill-seekers-enhance output/%s/", config["name"])
" Local (recommended): skill-seekers-enhance output/%s/", config["name"]
)
logger.info(" or re-run with: --enhance-local") logger.info(" or re-run with: --enhance-local")
logger.info( logger.info(
" API-based: skill-seekers-enhance-api output/%s/", " API-based: skill-seekers-enhance-api output/%s/",

View File

@@ -79,9 +79,7 @@ class WorkflowStep:
setup_required: str | None = None setup_required: str | None = None
explanation: str | None = None # Why this step matters explanation: str | None = None # Why this step matters
common_pitfall: str | None = None # Warning for this step common_pitfall: str | None = None # Warning for this step
common_variations: list[str] = field( common_variations: list[str] = field(default_factory=list) # AI: Alternative approaches
default_factory=list
) # AI: Alternative approaches
@dataclass @dataclass
@@ -223,9 +221,7 @@ class WorkflowAnalyzer:
# Check if next statement is assertion (verification) # Check if next statement is assertion (verification)
idx = statements.index(stmt) idx = statements.index(stmt)
verification = None verification = None
if idx + 1 < len(statements) and isinstance( if idx + 1 < len(statements) and isinstance(statements[idx + 1], ast.Assert):
statements[idx + 1], ast.Assert
):
verification = ast.get_source_segment(code, statements[idx + 1]) verification = ast.get_source_segment(code, statements[idx + 1])
steps.append( steps.append(
@@ -244,9 +240,7 @@ class WorkflowAnalyzer:
return steps return steps
def _extract_steps_heuristic( def _extract_steps_heuristic(self, code: str, _workflow: dict) -> list[WorkflowStep]:
self, code: str, _workflow: dict
) -> list[WorkflowStep]:
"""Extract steps using heuristics (for non-Python or invalid syntax)""" """Extract steps using heuristics (for non-Python or invalid syntax)"""
steps = [] steps = []
lines = code.split("\n") lines = code.split("\n")
@@ -282,9 +276,7 @@ class WorkflowAnalyzer:
step_code = "\n".join(current_step) step_code = "\n".join(current_step)
description = self._infer_description_from_code(step_code) description = self._infer_description_from_code(step_code)
steps.append( steps.append(
WorkflowStep( WorkflowStep(step_number=step_num, code=step_code, description=description)
step_number=step_num, code=step_code, description=description
)
) )
return steps return steps
@@ -454,9 +446,7 @@ class WorkflowGrouper:
groups = self._group_by_file_path(workflows) groups = self._group_by_file_path(workflows)
return groups return groups
def _group_by_ai_tutorial_group( def _group_by_ai_tutorial_group(self, workflows: list[dict]) -> dict[str, list[dict]]:
self, workflows: list[dict]
) -> dict[str, list[dict]]:
"""Group by AI-generated tutorial_group (from C3.6 enhancement)""" """Group by AI-generated tutorial_group (from C3.6 enhancement)"""
groups = defaultdict(list) groups = defaultdict(list)
ungrouped = [] ungrouped = []
@@ -914,9 +904,7 @@ class HowToGuideBuilder:
"""Filter to workflow category only""" """Filter to workflow category only"""
return [ex for ex in examples if ex.get("category") == "workflow"] return [ex for ex in examples if ex.get("category") == "workflow"]
def _create_guide( def _create_guide(self, title: str, workflows: list[dict], enhancer=None) -> HowToGuide:
self, title: str, workflows: list[dict], enhancer=None
) -> HowToGuide:
""" """
Generate single guide from workflow(s). Generate single guide from workflow(s).
@@ -974,18 +962,14 @@ class HowToGuideBuilder:
# Add AI enhancements if enhancer is available # Add AI enhancements if enhancer is available
if enhancer: if enhancer:
self._enhance_guide_with_ai( self._enhance_guide_with_ai(guide, primary_workflow.get("ai_analysis", {}), enhancer)
guide, primary_workflow.get("ai_analysis", {}), enhancer
)
elif self.enhance_with_ai and primary_workflow.get("ai_analysis"): elif self.enhance_with_ai and primary_workflow.get("ai_analysis"):
# Fallback to old enhancement method (basic) # Fallback to old enhancement method (basic)
self._enhance_guide_with_ai_basic(guide, primary_workflow["ai_analysis"]) self._enhance_guide_with_ai_basic(guide, primary_workflow["ai_analysis"])
return guide return guide
def _generate_overview( def _generate_overview(self, primary_workflow: dict, _all_workflows: list[dict]) -> str:
self, primary_workflow: dict, _all_workflows: list[dict]
) -> str:
"""Generate guide overview""" """Generate guide overview"""
# Try to get explanation from AI analysis # Try to get explanation from AI analysis
if primary_workflow.get("ai_analysis"): if primary_workflow.get("ai_analysis"):
@@ -1019,10 +1003,7 @@ class HowToGuideBuilder:
# Prepare guide data for enhancer # Prepare guide data for enhancer
guide_data = { guide_data = {
"title": guide.title, "title": guide.title,
"steps": [ "steps": [{"description": step.description, "code": step.code} for step in guide.steps],
{"description": step.description, "code": step.code}
for step in guide.steps
],
"language": "python", # TODO: Detect from code "language": "python", # TODO: Detect from code
"prerequisites": guide.prerequisites, "prerequisites": guide.prerequisites,
"description": guide.overview, "description": guide.overview,
@@ -1055,9 +1036,7 @@ class HowToGuideBuilder:
if "use_cases" in enhanced_data: if "use_cases" in enhanced_data:
guide.use_cases = enhanced_data["use_cases"] guide.use_cases = enhanced_data["use_cases"]
logger.info( logger.info(f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements")
f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements"
)
def _enhance_guide_with_ai_basic(self, guide: HowToGuide, ai_analysis: dict): def _enhance_guide_with_ai_basic(self, guide: HowToGuide, ai_analysis: dict):
""" """
@@ -1122,9 +1101,7 @@ class HowToGuideBuilder:
for guide in guides: for guide in guides:
# Generate filename from title # Generate filename from title
filename = ( filename = guide.title.lower().replace(" ", "-").replace(":", "") + ".md"
guide.title.lower().replace(" ", "-").replace(":", "") + ".md"
)
file_path = use_case_dir / filename file_path = use_case_dir / filename
# Generate and save markdown # Generate and save markdown
@@ -1135,9 +1112,7 @@ class HowToGuideBuilder:
index_markdown = self.generator.generate_index(collection.guides) index_markdown = self.generator.generate_index(collection.guides)
(output_dir / "index.md").write_text(index_markdown, encoding="utf-8") (output_dir / "index.md").write_text(index_markdown, encoding="utf-8")
logger.info( logger.info(f"✅ Saved {collection.total_guides} guides + index to {output_dir}")
f"✅ Saved {collection.total_guides} guides + index to {output_dir}"
)
# ============================================================================ # ============================================================================
@@ -1244,9 +1219,7 @@ Grouping Strategies:
# Extract from directory using test example extractor # Extract from directory using test example extractor
print("⚠️ Directory input requires test example extractor") print("⚠️ Directory input requires test example extractor")
print(" Please use test_examples.json output from C3.2") print(" Please use test_examples.json output from C3.2")
print( print(f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json")
f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json"
)
sys.exit(1) sys.exit(1)
else: else:

View File

@@ -127,9 +127,7 @@ class LlmsTxtParser:
# Extract code blocks # Extract code blocks
code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL) code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL)
for lang, code in code_blocks: for lang, code in code_blocks:
page["code_samples"].append( page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"})
{"code": code.strip(), "language": lang or "unknown"}
)
# Extract h2/h3 headings # Extract h2/h3 headings
headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE) headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE)
@@ -146,9 +144,7 @@ class LlmsTxtParser:
content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL) content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
# Extract paragraphs # Extract paragraphs
paragraphs = [ paragraphs = [p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20]
p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20
]
page["content"] = "\n\n".join(paragraphs) page["content"] = "\n\n".join(paragraphs)
return page return page

View File

@@ -237,9 +237,7 @@ class PatternRecognizer:
self.detectors.append(TemplateMethodDetector(self.depth)) self.detectors.append(TemplateMethodDetector(self.depth))
self.detectors.append(ChainOfResponsibilityDetector(self.depth)) self.detectors.append(ChainOfResponsibilityDetector(self.depth))
def analyze_file( def analyze_file(self, file_path: str, content: str, language: str) -> PatternReport:
self, file_path: str, content: str, language: str
) -> PatternReport:
""" """
Analyze a single file for design patterns. Analyze a single file for design patterns.
@@ -581,9 +579,7 @@ class FactoryDetector(BasePatternDetector):
# Check if multiple factory methods exist (Abstract Factory pattern) # Check if multiple factory methods exist (Abstract Factory pattern)
if len(factory_methods) >= 2: if len(factory_methods) >= 2:
evidence.append( evidence.append(f"Multiple factory methods: {', '.join(factory_methods[:3])}")
f"Multiple factory methods: {', '.join(factory_methods[:3])}"
)
confidence += 0.2 confidence += 0.2
# Check for inheritance (factory hierarchy) # Check for inheritance (factory hierarchy)
@@ -800,35 +796,25 @@ class StrategyDetector(BasePatternDetector):
] ]
if siblings: if siblings:
evidence.append( evidence.append(f"Part of strategy family with: {', '.join(siblings[:3])}")
f"Part of strategy family with: {', '.join(siblings[:3])}"
)
confidence += 0.5 confidence += 0.5
if base_class and ( if base_class and ("strategy" in base_class.lower() or "policy" in base_class.lower()):
"strategy" in base_class.lower() or "policy" in base_class.lower()
):
evidence.append(f"Inherits from strategy base: {base_class}") evidence.append(f"Inherits from strategy base: {base_class}")
confidence += 0.3 confidence += 0.3
# Check if this is a strategy base class # Check if this is a strategy base class
# (has subclasses in same file) # (has subclasses in same file)
subclasses = [ subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes]
cls.name for cls in all_classes if class_sig.name in cls.base_classes
]
if len(subclasses) >= 2: if len(subclasses) >= 2:
evidence.append( evidence.append(f"Strategy base with implementations: {', '.join(subclasses[:3])}")
f"Strategy base with implementations: {', '.join(subclasses[:3])}"
)
confidence += 0.6 confidence += 0.6
# Check for single dominant method (strategy interface) # Check for single dominant method (strategy interface)
if len(class_sig.methods) == 1 or len(class_sig.methods) == 2: if len(class_sig.methods) == 1 or len(class_sig.methods) == 2:
# Single method or method + __init__ # Single method or method + __init__
main_method = [ main_method = [m for m in class_sig.methods if m.name not in ["__init__", "__new__"]]
m for m in class_sig.methods if m.name not in ["__init__", "__new__"]
]
if main_method: if main_method:
evidence.append(f"Strategy interface method: {main_method[0].name}") evidence.append(f"Strategy interface method: {main_method[0].name}")
confidence += 0.2 confidence += 0.2
@@ -939,8 +925,7 @@ class DecoratorDetector(BasePatternDetector):
if init_method and len(init_method.parameters) > 1: # More than just 'self' if init_method and len(init_method.parameters) > 1: # More than just 'self'
param_names = [p.name for p in init_method.parameters if p.name != "self"] param_names = [p.name for p in init_method.parameters if p.name != "self"]
if any( if any(
name in ["wrapped", "component", "inner", "obj", "target"] name in ["wrapped", "component", "inner", "obj", "target"] for name in param_names
for name in param_names
): ):
evidence.append(f"Takes wrapped object in constructor: {param_names}") evidence.append(f"Takes wrapped object in constructor: {param_names}")
confidence += 0.4 confidence += 0.4
@@ -1298,9 +1283,7 @@ class TemplateMethodDetector(BasePatternDetector):
class_lower = class_sig.name.lower() class_lower = class_sig.name.lower()
if any(keyword in class_lower for keyword in template_keywords): if any(keyword in class_lower for keyword in template_keywords):
# Check if has subclasses # Check if has subclasses
subclasses = [ subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes]
cls.name for cls in all_classes if class_sig.name in cls.base_classes
]
if subclasses: if subclasses:
return PatternInstance( return PatternInstance(
@@ -1310,9 +1293,7 @@ class TemplateMethodDetector(BasePatternDetector):
location="", location="",
class_name=class_sig.name, class_name=class_sig.name,
line_number=class_sig.line_number, line_number=class_sig.line_number,
evidence=[ evidence=[f"Abstract base with subclasses: {', '.join(subclasses[:2])}"],
f"Abstract base with subclasses: {', '.join(subclasses[:2])}"
],
related_classes=subclasses, related_classes=subclasses,
) )
@@ -1329,9 +1310,7 @@ class TemplateMethodDetector(BasePatternDetector):
# 3. Has template method that orchestrates # 3. Has template method that orchestrates
# Check for subclasses # Check for subclasses
subclasses = [ subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes]
cls.name for cls in all_classes if class_sig.name in cls.base_classes
]
if len(subclasses) >= 1: if len(subclasses) >= 1:
evidence.append(f"Base class with {len(subclasses)} implementations") evidence.append(f"Base class with {len(subclasses)} implementations")
@@ -1467,8 +1446,7 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
# Check for set_next() method # Check for set_next() method
has_set_next = any( has_set_next = any(
"next" in m.name.lower() "next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower())
and ("set" in m.name.lower() or "add" in m.name.lower())
for m in class_sig.methods for m in class_sig.methods
) )
@@ -1489,9 +1467,7 @@ class ChainOfResponsibilityDetector(BasePatternDetector):
] ]
if siblings and has_next_ref: if siblings and has_next_ref:
evidence.append( evidence.append(f"Part of handler chain with: {', '.join(siblings[:2])}")
f"Part of handler chain with: {', '.join(siblings[:2])}"
)
confidence += 0.2 confidence += 0.2
if confidence >= 0.5: if confidence >= 0.5:
@@ -1590,9 +1566,7 @@ class LanguageAdapter:
pattern.evidence.append("Abstract Factory pattern") pattern.evidence.append("Abstract Factory pattern")
# Template Method: Abstract classes common # Template Method: Abstract classes common
elif ( elif pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str:
pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str
):
pattern.confidence = min(pattern.confidence + 0.1, 1.0) pattern.confidence = min(pattern.confidence + 0.1, 1.0)
# Go adaptations # Go adaptations
@@ -1645,9 +1619,7 @@ class LanguageAdapter:
pattern.evidence.append("Ruby Singleton module") pattern.evidence.append("Ruby Singleton module")
# Builder: Method chaining is idiomatic # Builder: Method chaining is idiomatic
elif ( elif pattern.pattern_type == "Builder" and "method chaining" in evidence_str:
pattern.pattern_type == "Builder" and "method chaining" in evidence_str
):
pattern.confidence = min(pattern.confidence + 0.05, 1.0) pattern.confidence = min(pattern.confidence + 0.05, 1.0)
# PHP adaptations # PHP adaptations
@@ -1702,9 +1674,7 @@ Supported Languages:
action="append", action="append",
help="Source file to analyze (can be specified multiple times)", help="Source file to analyze (can be specified multiple times)",
) )
parser.add_argument( parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)")
"--directory", help="Directory to analyze (analyzes all source files)"
)
parser.add_argument( parser.add_argument(
"--output", help="Output directory for results (default: current directory)" "--output", help="Output directory for results (default: current directory)"
) )

View File

@@ -194,15 +194,11 @@ class PythonTestAnalyzer:
for node in ast.walk(tree): for node in ast.walk(tree):
if isinstance(node, ast.ClassDef): if isinstance(node, ast.ClassDef):
if self._is_test_class(node): if self._is_test_class(node):
examples.extend( examples.extend(self._extract_from_test_class(node, file_path, imports))
self._extract_from_test_class(node, file_path, imports)
)
# Find test functions (pytest) # Find test functions (pytest)
elif isinstance(node, ast.FunctionDef) and self._is_test_function(node): elif isinstance(node, ast.FunctionDef) and self._is_test_function(node):
examples.extend( examples.extend(self._extract_from_test_function(node, file_path, imports))
self._extract_from_test_function(node, file_path, imports)
)
return examples return examples
@@ -236,9 +232,7 @@ class PythonTestAnalyzer:
return True return True
# Has @pytest.mark decorator # Has @pytest.mark decorator
for decorator in node.decorator_list: for decorator in node.decorator_list:
if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse( if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(decorator):
decorator
):
return True return True
return False return False
@@ -255,9 +249,7 @@ class PythonTestAnalyzer:
for node in class_node.body: for node in class_node.body:
if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
examples.extend( examples.extend(
self._analyze_test_body( self._analyze_test_body(node, file_path, imports, setup_code=setup_code)
node, file_path, imports, setup_code=setup_code
)
) )
return examples return examples
@@ -269,9 +261,7 @@ class PythonTestAnalyzer:
# Check for fixture parameters # Check for fixture parameters
fixture_setup = self._extract_fixtures(func_node) fixture_setup = self._extract_fixtures(func_node)
return self._analyze_test_body( return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup)
func_node, file_path, imports, setup_code=fixture_setup
)
def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None: def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None:
"""Extract setUp method code""" """Extract setUp method code"""
@@ -328,9 +318,7 @@ class PythonTestAnalyzer:
examples.extend(configs) examples.extend(configs)
# 4. Multi-step workflows (integration tests) # 4. Multi-step workflows (integration tests)
workflows = self._find_workflows( workflows = self._find_workflows(func_node, file_path, docstring, setup_code, tags, imports)
func_node, file_path, docstring, setup_code, tags, imports
)
examples.extend(workflows) examples.extend(workflows)
return examples return examples
@@ -491,9 +479,7 @@ class PythonTestAnalyzer:
code=code, code=code,
language="Python", language="Python",
description=f"Configuration example: {description}", description=f"Configuration example: {description}",
expected_behavior=self._extract_assertion_after( expected_behavior=self._extract_assertion_after(func_node, node),
func_node, node
),
setup_code=setup_code, setup_code=setup_code,
file_path=file_path, file_path=file_path,
line_start=node.lineno, line_start=node.lineno,
@@ -594,9 +580,7 @@ class PythonTestAnalyzer:
integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"] integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"]
return any(keyword in test_name for keyword in integration_keywords) return any(keyword in test_name for keyword in integration_keywords)
def _extract_assertion_after( def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str:
self, func_node: ast.FunctionDef, target_node: ast.AST
) -> str:
"""Find assertion that follows the target node""" """Find assertion that follows the target node"""
found_target = False found_target = False
for stmt in func_node.body: for stmt in func_node.body:
@@ -727,8 +711,7 @@ class GenericTestAnalyzer:
code=config_match.group(0), code=config_match.group(0),
language=language, language=language,
file_path=file_path, file_path=file_path,
line_number=code[: start_pos + config_match.start()].count("\n") line_number=code[: start_pos + config_match.start()].count("\n") + 1,
+ 1,
) )
examples.append(example) examples.append(example)
@@ -871,9 +854,7 @@ class TestExampleExtractor:
logger.warning(f"⚠️ Failed to initialize AI enhancer: {e}") logger.warning(f"⚠️ Failed to initialize AI enhancer: {e}")
self.enhance_with_ai = False self.enhance_with_ai = False
def extract_from_directory( def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport:
self, directory: Path, recursive: bool = True
) -> ExampleReport:
"""Extract examples from all test files in directory""" """Extract examples from all test files in directory"""
directory = Path(directory) directory = Path(directory)
@@ -927,13 +908,11 @@ class TestExampleExtractor:
# Limit per file # Limit per file
if len(filtered_examples) > self.max_per_file: if len(filtered_examples) > self.max_per_file:
# Sort by confidence and take top N # Sort by confidence and take top N
filtered_examples = sorted( filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[
filtered_examples, key=lambda x: x.confidence, reverse=True : self.max_per_file
)[: self.max_per_file] ]
logger.info( logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}")
f"Extracted {len(filtered_examples)} examples from {file_path.name}"
)
return filtered_examples return filtered_examples
@@ -988,9 +967,7 @@ class TestExampleExtractor:
# Calculate averages # Calculate averages
avg_complexity = ( avg_complexity = (
sum(ex.complexity_score for ex in examples) / len(examples) sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0
if examples
else 0.0
) )
high_value_count = sum(1 for ex in examples if ex.confidence > 0.7) high_value_count = sum(1 for ex in examples if ex.confidence > 0.7)
@@ -1050,9 +1027,7 @@ Examples:
help="Maximum examples per file (default: 10)", help="Maximum examples per file (default: 10)",
) )
parser.add_argument("--json", action="store_true", help="Output JSON format") parser.add_argument("--json", action="store_true", help="Output JSON format")
parser.add_argument( parser.add_argument("--markdown", action="store_true", help="Output Markdown format")
"--markdown", action="store_true", help="Output Markdown format"
)
parser.add_argument( parser.add_argument(
"--recursive", "--recursive",
action="store_true", action="store_true",
@@ -1079,9 +1054,7 @@ Examples:
examples = extractor.extract_from_file(Path(args.file)) examples = extractor.extract_from_file(Path(args.file))
report = extractor._create_report(examples, file_path=args.file) report = extractor._create_report(examples, file_path=args.file)
else: else:
report = extractor.extract_from_directory( report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive)
Path(args.directory), recursive=args.recursive
)
# Output results # Output results
if args.json: if args.json:

View File

@@ -124,9 +124,7 @@ class UnifiedCodebaseAnalyzer:
AnalysisResult with all 3 streams AnalysisResult with all 3 streams
""" """
# Use three-stream fetcher # Use three-stream fetcher
fetcher = GitHubThreeStreamFetcher( fetcher = GitHubThreeStreamFetcher(repo_url, self.github_token, interactive=interactive)
repo_url, self.github_token, interactive=interactive
)
three_streams = fetcher.fetch(output_dir) three_streams = fetcher.fetch(output_dir)
# Analyze code with specified depth # Analyze code with specified depth
@@ -245,9 +243,7 @@ class UnifiedCodebaseAnalyzer:
basic = self.basic_analysis(directory) basic = self.basic_analysis(directory)
# Run full C3.x analysis using existing codebase_scraper # Run full C3.x analysis using existing codebase_scraper
print( print("🔍 Running C3.x components (patterns, examples, guides, configs, architecture)...")
"🔍 Running C3.x components (patterns, examples, guides, configs, architecture)..."
)
try: try:
# Import codebase analyzer # Import codebase analyzer
@@ -282,19 +278,11 @@ class UnifiedCodebaseAnalyzer:
c3x = {**basic, "analysis_type": "c3x", **c3x_data} c3x = {**basic, "analysis_type": "c3x", **c3x_data}
print("✅ C3.x analysis complete!") print("✅ C3.x analysis complete!")
print( print(f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected")
f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected" print(f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted")
) print(f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated")
print(
f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted"
)
print(
f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated"
)
print(f" - {len(c3x_data.get('c3_4_configs', []))} config files analyzed") print(f" - {len(c3x_data.get('c3_4_configs', []))} config files analyzed")
print( print(f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found")
f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found"
)
return c3x return c3x
@@ -451,9 +439,7 @@ class UnifiedCodebaseAnalyzer:
if item.is_dir(): if item.is_dir():
# Only include immediate subdirectories # Only include immediate subdirectories
structure["children"].append( structure["children"].append({"name": item.name, "type": "directory"})
{"name": item.name, "type": "directory"}
)
elif item.is_file(): elif item.is_file():
structure["children"].append( structure["children"].append(
{"name": item.name, "type": "file", "extension": item.suffix} {"name": item.name, "type": "file", "extension": item.suffix}

View File

@@ -203,15 +203,11 @@ How to use async tools.
], ],
} }
def test_scenario_1_github_three_stream_fetcher( def test_scenario_1_github_three_stream_fetcher(self, mock_github_repo, mock_github_api_data):
self, mock_github_repo, mock_github_api_data
):
"""Test GitHub three-stream fetcher with mock data.""" """Test GitHub three-stream fetcher with mock data."""
# Create fetcher with mock # Create fetcher with mock
with ( with (
patch.object( patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo
),
patch.object( patch.object(
GitHubThreeStreamFetcher, GitHubThreeStreamFetcher,
"fetch_github_metadata", "fetch_github_metadata",
@@ -251,14 +247,10 @@ How to use async tools.
assert len(three_streams.insights_stream.known_solutions) >= 1 assert len(three_streams.insights_stream.known_solutions) >= 1
assert len(three_streams.insights_stream.top_labels) >= 2 assert len(three_streams.insights_stream.top_labels) >= 2
def test_scenario_1_unified_analyzer_github( def test_scenario_1_unified_analyzer_github(self, mock_github_repo, mock_github_api_data):
self, mock_github_repo, mock_github_api_data
):
"""Test unified analyzer with GitHub source.""" """Test unified analyzer with GitHub source."""
with ( with (
patch.object( patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo),
GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo
),
patch.object( patch.object(
GitHubThreeStreamFetcher, GitHubThreeStreamFetcher,
"fetch_github_metadata", "fetch_github_metadata",
@@ -286,9 +278,7 @@ How to use async tools.
{"name": "test_azure_provider", "file": "test_auth.py"}, {"name": "test_azure_provider", "file": "test_auth.py"},
], ],
"c3_2_examples_count": 2, "c3_2_examples_count": 2,
"c3_3_guides": [ "c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}],
{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}
],
"c3_4_configs": [], "c3_4_configs": [],
"c3_7_architecture": [ "c3_7_architecture": [
{ {
@@ -335,9 +325,7 @@ How to use async tools.
{ {
"name": "fastmcp-oauth", "name": "fastmcp-oauth",
"description": "OAuth authentication for FastMCP", "description": "OAuth authentication for FastMCP",
"categories": { "categories": {"oauth": ["oauth", "auth", "provider", "google", "azure"]},
"oauth": ["oauth", "auth", "provider", "google", "azure"]
},
} }
) )
) )
@@ -491,9 +479,7 @@ pip install fastmcp
# Check content quality (Architecture Section 8.2) # Check content quality (Architecture Section 8.2)
assert "Issue #42" in router_md, "Missing issue references" assert "Issue #42" in router_md, "Missing issue references"
assert "" in router_md or "Stars:" in router_md, "Missing GitHub metadata" assert "" in router_md or "Stars:" in router_md, "Missing GitHub metadata"
assert ( assert "Quick Start" in router_md or "README" in router_md, "Missing README content"
"Quick Start" in router_md or "README" in router_md
), "Missing README content"
class TestScenario2MultiSource: class TestScenario2MultiSource:
@@ -617,15 +603,11 @@ class TestScenario2MultiSource:
# Layer 4: GitHub insights (community knowledge) # Layer 4: GitHub insights (community knowledge)
# Mock source 1 (HTML docs) # Mock source 1 (HTML docs)
source1_data = { source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]}
"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]
}
# Mock source 2 (GitHub C3.x) # Mock source 2 (GitHub C3.x)
source2_data = { source2_data = {
"api": [ "api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}]
{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}
]
} }
# Mock GitHub streams # Mock GitHub streams
@@ -651,9 +633,7 @@ class TestScenario2MultiSource:
) )
# Create merger with required arguments # Create merger with required arguments
merger = RuleBasedMerger( merger = RuleBasedMerger(docs_data=source1_data, github_data=source2_data, conflicts=[])
docs_data=source1_data, github_data=source2_data, conflicts=[]
)
# Merge using merge_all() method # Merge using merge_all() method
merged = merger.merge_all() merged = merger.merge_all()
@@ -770,12 +750,8 @@ def test_connection():
mock_c3x.return_value = { mock_c3x.return_value = {
"files": ["database.py", "api.py"], "files": ["database.py", "api.py"],
"analysis_type": "c3x", "analysis_type": "c3x",
"c3_1_patterns": [ "c3_1_patterns": [{"name": "Singleton", "count": 1, "file": "database.py"}],
{"name": "Singleton", "count": 1, "file": "database.py"} "c3_2_examples": [{"name": "test_connection", "file": "test_database.py"}],
],
"c3_2_examples": [
{"name": "test_connection", "file": "test_database.py"}
],
"c3_2_examples_count": 1, "c3_2_examples_count": 1,
"c3_3_guides": [], "c3_3_guides": [],
"c3_4_configs": [], "c3_4_configs": [],
@@ -967,9 +943,7 @@ Based on analysis of GitHub issues:
print(f"\nGitHub overhead: {github_overhead} lines") print(f"\nGitHub overhead: {github_overhead} lines")
# Architecture target: 20-60 lines # Architecture target: 20-60 lines
assert ( assert 20 <= github_overhead <= 60, f"GitHub overhead {github_overhead} not in range 20-60"
20 <= github_overhead <= 60
), f"GitHub overhead {github_overhead} not in range 20-60"
def test_router_size_within_limits(self): def test_router_size_within_limits(self):
"""Test router size is 150±20 lines (Architecture Section 8.1, Line 1970).""" """Test router size is 150±20 lines (Architecture Section 8.1, Line 1970)."""
@@ -977,9 +951,7 @@ Based on analysis of GitHub issues:
router_lines = 150 # Simulated count router_lines = 150 # Simulated count
# Architecture target: 150 lines (±20) # Architecture target: 150 lines (±20)
assert ( assert 130 <= router_lines <= 170, f"Router size {router_lines} not in range 130-170"
130 <= router_lines <= 170
), f"Router size {router_lines} not in range 130-170"
def test_content_quality_requirements(self): def test_content_quality_requirements(self):
"""Test content quality (Architecture Section 8.2, Lines 1977-2014).""" """Test content quality (Architecture Section 8.2, Lines 1977-2014)."""
@@ -1021,9 +993,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check minimum 3 code examples # Check minimum 3 code examples
code_blocks = sub_skill_md.count("```") code_blocks = sub_skill_md.count("```")
assert ( assert code_blocks >= 6, (
code_blocks >= 6 f"Need at least 3 code examples (6 markers), found {code_blocks // 2}"
), f"Need at least 3 code examples (6 markers), found {code_blocks // 2}" )
# Check language tags # Check language tags
assert "```python" in sub_skill_md, "Code blocks must have language tags" assert "```python" in sub_skill_md, "Code blocks must have language tags"
@@ -1038,9 +1010,9 @@ provider = GitHubProvider(client_id="...", client_secret="...")
# Check solution indicators for closed issues # Check solution indicators for closed issues
if "closed" in sub_skill_md.lower(): if "closed" in sub_skill_md.lower():
assert ( assert "" in sub_skill_md or "Solution" in sub_skill_md, (
"" in sub_skill_md or "Solution" in sub_skill_md "Closed issues should indicate solution found"
), "Closed issues should indicate solution found" )
class TestTokenEfficiencyCalculation: class TestTokenEfficiencyCalculation:
@@ -1077,9 +1049,9 @@ class TestTokenEfficiencyCalculation:
# With selective loading and caching, achieve 35-40% # With selective loading and caching, achieve 35-40%
# Even conservative estimate shows 29.5%, actual usage patterns show 35-40% # Even conservative estimate shows 29.5%, actual usage patterns show 35-40%
assert ( assert reduction_percent >= 29, (
reduction_percent >= 29 f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)"
), f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)" )
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -103,9 +103,7 @@ class TestAsyncScrapeMethods(unittest.TestCase):
os.chdir(tmpdir) os.chdir(tmpdir)
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
self.assertTrue(hasattr(converter, "scrape_page_async")) self.assertTrue(hasattr(converter, "scrape_page_async"))
self.assertTrue( self.assertTrue(asyncio.iscoroutinefunction(converter.scrape_page_async))
asyncio.iscoroutinefunction(converter.scrape_page_async)
)
finally: finally:
os.chdir(self.original_cwd) os.chdir(self.original_cwd)
@@ -263,9 +261,7 @@ class TestAsyncErrorHandling(unittest.TestCase):
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
# Mock client.get to raise exception # Mock client.get to raise exception
with patch.object( with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")):
client, "get", side_effect=httpx.HTTPError("Test error")
):
# Should not raise exception, just log error # Should not raise exception, just log error
await converter.scrape_page_async( await converter.scrape_page_async(
"https://example.com/test", semaphore, client "https://example.com/test", semaphore, client

View File

@@ -134,9 +134,7 @@ class TestREADMEExtraction(unittest.TestCase):
scraper._extract_readme() scraper._extract_readme()
self.assertIn("readme", scraper.extracted_data) self.assertIn("readme", scraper.extracted_data)
self.assertEqual( self.assertEqual(scraper.extracted_data["readme"], "# React\n\nA JavaScript library")
scraper.extracted_data["readme"], "# React\n\nA JavaScript library"
)
def test_extract_readme_tries_multiple_locations(self): def test_extract_readme_tries_multiple_locations(self):
"""Test that README extraction tries multiple file locations""" """Test that README extraction tries multiple file locations"""
@@ -477,15 +475,9 @@ class TestReleasesExtraction(unittest.TestCase):
mock_release1.prerelease = False mock_release1.prerelease = False
mock_release1.created_at = datetime(2023, 3, 1) mock_release1.created_at = datetime(2023, 3, 1)
mock_release1.published_at = datetime(2023, 3, 1) mock_release1.published_at = datetime(2023, 3, 1)
mock_release1.html_url = ( mock_release1.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0"
"https://github.com/facebook/react/releases/tag/v18.0.0" mock_release1.tarball_url = "https://github.com/facebook/react/archive/v18.0.0.tar.gz"
) mock_release1.zipball_url = "https://github.com/facebook/react/archive/v18.0.0.zip"
mock_release1.tarball_url = (
"https://github.com/facebook/react/archive/v18.0.0.tar.gz"
)
mock_release1.zipball_url = (
"https://github.com/facebook/react/archive/v18.0.0.zip"
)
mock_release2 = Mock() mock_release2 = Mock()
mock_release2.tag_name = "v18.0.0-rc.0" mock_release2.tag_name = "v18.0.0-rc.0"
@@ -495,15 +487,9 @@ class TestReleasesExtraction(unittest.TestCase):
mock_release2.prerelease = True mock_release2.prerelease = True
mock_release2.created_at = datetime(2023, 2, 1) mock_release2.created_at = datetime(2023, 2, 1)
mock_release2.published_at = datetime(2023, 2, 1) mock_release2.published_at = datetime(2023, 2, 1)
mock_release2.html_url = ( mock_release2.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0"
"https://github.com/facebook/react/releases/tag/v18.0.0-rc.0" mock_release2.tarball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz"
) mock_release2.zipball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip"
mock_release2.tarball_url = (
"https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz"
)
mock_release2.zipball_url = (
"https://github.com/facebook/react/archive/v18.0.0-rc.0.zip"
)
with patch("skill_seekers.cli.github_scraper.Github"): with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config) scraper = self.GitHubScraper(config)
@@ -612,9 +598,7 @@ class TestGitHubToSkillConverter(unittest.TestCase):
config = {"repo": "facebook/react", "name": "test", "description": "Test skill"} config = {"repo": "facebook/react", "name": "test", "description": "Test skill"}
# Override data file path # Override data file path
with patch( with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__") as mock_init:
"skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__"
) as mock_init:
mock_init.return_value = None mock_init.return_value = None
converter = self.GitHubToSkillConverter(config) converter = self.GitHubToSkillConverter(config)
converter.data_file = str(self.data_file) converter.data_file = str(self.data_file)
@@ -1000,9 +984,7 @@ class TestErrorHandling(unittest.TestCase):
with patch("skill_seekers.cli.github_scraper.Github"): with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config) scraper = self.GitHubScraper(config)
scraper.repo = None scraper.repo = None
scraper.github.get_repo = Mock( scraper.github.get_repo = Mock(side_effect=GithubException(404, "Not found"))
side_effect=GithubException(404, "Not found")
)
# Should raise ValueError with helpful message # Should raise ValueError with helpful message
with self.assertRaises(ValueError) as context: with self.assertRaises(ValueError) as context:
@@ -1022,9 +1004,7 @@ class TestErrorHandling(unittest.TestCase):
with patch("skill_seekers.cli.github_scraper.Github"): with patch("skill_seekers.cli.github_scraper.Github"):
scraper = self.GitHubScraper(config) scraper = self.GitHubScraper(config)
scraper.repo = Mock() scraper.repo = Mock()
scraper.repo.get_issues.side_effect = GithubException( scraper.repo.get_issues.side_effect = GithubException(403, "Rate limit exceeded")
403, "Rate limit exceeded"
)
# Should handle gracefully and log warning # Should handle gracefully and log warning
scraper._extract_issues() scraper._extract_issues()

View File

@@ -31,9 +31,7 @@ class TestGuideEnhancerModeDetection:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="auto") enhancer = GuideEnhancer(mode="auto")
@@ -111,9 +109,7 @@ class TestGuideEnhancerStepDescriptions:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
@@ -179,9 +175,7 @@ class TestGuideEnhancerTroubleshooting:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
@@ -192,9 +186,7 @@ class TestGuideEnhancerTroubleshooting:
guide_data = { guide_data = {
"title": "Test Guide", "title": "Test Guide",
"steps": [ "steps": [{"description": "import requests", "code": "import requests"}],
{"description": "import requests", "code": "import requests"}
],
"language": "python", "language": "python",
} }
result = enhancer.enhance_troubleshooting(guide_data) result = enhancer.enhance_troubleshooting(guide_data)
@@ -246,9 +238,7 @@ class TestGuideEnhancerPrerequisites:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
@@ -293,9 +283,7 @@ class TestGuideEnhancerNextSteps:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
@@ -340,9 +328,7 @@ class TestGuideEnhancerUseCases:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
@@ -426,9 +412,7 @@ class TestGuideEnhancerFullWorkflow:
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}),
patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True),
patch( patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic,
"skill_seekers.cli.guide_enhancer.anthropic", create=True
) as mock_anthropic,
): ):
mock_anthropic.Anthropic = Mock() mock_anthropic.Anthropic = Mock()
enhancer = GuideEnhancer(mode="api") enhancer = GuideEnhancer(mode="api")
@@ -461,9 +445,7 @@ class TestGuideEnhancerFullWorkflow:
"""Test graceful fallback on enhancement error""" """Test graceful fallback on enhancement error"""
enhancer = GuideEnhancer(mode="none") enhancer = GuideEnhancer(mode="none")
with patch.object( with patch.object(enhancer, "enhance_guide", side_effect=Exception("API error")):
enhancer, "enhance_guide", side_effect=Exception("API error")
):
guide_data = { guide_data = {
"title": "Test", "title": "Test",
"steps": [], "steps": [],
@@ -533,9 +515,7 @@ class TestGuideEnhancerPromptGeneration:
guide_data = { guide_data = {
"title": "How to Test", "title": "How to Test",
"steps": [ "steps": [{"description": "Write test", "code": "def test_example(): pass"}],
{"description": "Write test", "code": "def test_example(): pass"}
],
"language": "python", "language": "python",
"prerequisites": ["pytest"], "prerequisites": ["pytest"],
} }
@@ -583,9 +563,7 @@ class TestGuideEnhancerResponseParsing:
response = json.dumps( response = json.dumps(
{ {
"step_descriptions": [ "step_descriptions": [{"step_index": 0, "explanation": "Test", "variations": []}],
{"step_index": 0, "explanation": "Test", "variations": []}
],
"troubleshooting": [], "troubleshooting": [],
"prerequisites_detailed": [], "prerequisites_detailed": [],
"next_steps": [], "next_steps": [],

View File

@@ -174,9 +174,7 @@ class TestInstallToAgent:
self.skill_dir.mkdir() self.skill_dir.mkdir()
# Create SKILL.md # Create SKILL.md
(self.skill_dir / "SKILL.md").write_text( (self.skill_dir / "SKILL.md").write_text("# Test Skill\n\nThis is a test skill.")
"# Test Skill\n\nThis is a test skill."
)
# Create references directory with files # Create references directory with files
refs_dir = self.skill_dir / "references" refs_dir = self.skill_dir / "references"
@@ -201,9 +199,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path", "skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path, return_value=agent_path,
): ):
success, message = install_to_agent( success, message = install_to_agent(self.skill_dir, "claude", force=True)
self.skill_dir, "claude", force=True
)
assert success is True assert success is True
target_path = agent_path / "test-skill" target_path = agent_path / "test-skill"
@@ -219,9 +215,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path", "skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path, return_value=agent_path,
): ):
success, message = install_to_agent( success, message = install_to_agent(self.skill_dir, "claude", force=True)
self.skill_dir, "claude", force=True
)
assert success is True assert success is True
target_path = agent_path / "test-skill" target_path = agent_path / "test-skill"
@@ -246,9 +240,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path", "skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path, return_value=agent_path,
): ):
success, message = install_to_agent( success, message = install_to_agent(self.skill_dir, "claude", force=True)
self.skill_dir, "claude", force=True
)
assert success is True assert success is True
target_path = agent_path / "test-skill" target_path = agent_path / "test-skill"
@@ -269,9 +261,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path", "skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path, return_value=agent_path,
): ):
success, message = install_to_agent( success, message = install_to_agent(self.skill_dir, "claude", force=False)
self.skill_dir, "claude", force=False
)
assert success is False assert success is False
assert "already installed" in message.lower() assert "already installed" in message.lower()
@@ -289,9 +279,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path", "skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path, return_value=agent_path,
): ):
success, message = install_to_agent( success, message = install_to_agent(self.skill_dir, "claude", force=True)
self.skill_dir, "claude", force=True
)
assert success is True assert success is True
# Old file should be gone # Old file should be gone
@@ -328,9 +316,7 @@ class TestInstallToAgent:
"skill_seekers.cli.install_agent.get_agent_path", "skill_seekers.cli.install_agent.get_agent_path",
return_value=agent_path, return_value=agent_path,
): ):
success, message = install_to_agent( success, message = install_to_agent(self.skill_dir, "claude", dry_run=True)
self.skill_dir, "claude", dry_run=True
)
assert success is True assert success is True
assert "DRY RUN" in message assert "DRY RUN" in message
@@ -485,9 +471,7 @@ class TestInstallAgentCLI:
assert exit_code == 0 assert exit_code == 0
# Directory should NOT be created # Directory should NOT be created
assert not ( assert not (Path(agent_tmpdir) / ".claude" / "skills" / "test-skill").exists()
Path(agent_tmpdir) / ".claude" / "skills" / "test-skill"
).exists()
def test_cli_integration(self): def test_cli_integration(self):
"""Test end-to-end CLI execution.""" """Test end-to-end CLI execution."""

View File

@@ -50,9 +50,7 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
# Mock large CHANGELOG (1.4MB, encoding="none") # Mock large CHANGELOG (1.4MB, encoding="none")
mock_content = Mock() mock_content = Mock()
mock_content.type = "file" mock_content.type = "file"
mock_content.encoding = ( mock_content.encoding = "none" # This is what GitHub API returns for large files
"none" # This is what GitHub API returns for large files
)
mock_content.size = 1388271 mock_content.size = 1388271
mock_content.download_url = ( mock_content.download_url = (
"https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md" "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md"
@@ -82,9 +80,7 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase):
# VERIFY: CHANGELOG was extracted successfully # VERIFY: CHANGELOG was extracted successfully
self.assertIn("changelog", scraper.extracted_data) self.assertIn("changelog", scraper.extracted_data)
self.assertIn("Bug fixes", scraper.extracted_data["changelog"]) self.assertIn("Bug fixes", scraper.extracted_data["changelog"])
self.assertEqual( self.assertEqual(scraper.extracted_data["changelog"], mock_response.text)
scraper.extracted_data["changelog"], mock_response.text
)
def test_large_file_fallback_on_error(self): def test_large_file_fallback_on_error(self):
"""E2E: Verify graceful handling if download_url fails""" """E2E: Verify graceful handling if download_url fails"""
@@ -184,8 +180,7 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase):
# VERIFY: sys.argv contains --enhance-local flag # VERIFY: sys.argv contains --enhance-local flag
# (main.py should have added it before calling github_scraper) # (main.py should have added it before calling github_scraper)
called_with_enhance = any( called_with_enhance = any(
"--enhance-local" in str(call) "--enhance-local" in str(call) for call in mock_github_main.call_args_list
for call in mock_github_main.call_args_list
) )
self.assertTrue( self.assertTrue(
called_with_enhance or "--enhance-local" in sys.argv, called_with_enhance or "--enhance-local" in sys.argv,
@@ -229,9 +224,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
os.environ, os.environ,
{"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url},
), ),
patch( patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
) as mock_anthropic,
): ):
# Create enhancer # Create enhancer
_enhancer = SkillEnhancer(self.skill_dir) _enhancer = SkillEnhancer(self.skill_dir)
@@ -258,9 +251,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
# Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY # Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY
with ( with (
patch.dict(os.environ, {"ANTHROPIC_AUTH_TOKEN": custom_token}, clear=True), patch.dict(os.environ, {"ANTHROPIC_AUTH_TOKEN": custom_token}, clear=True),
patch( patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
) as mock_anthropic,
): ):
# Create enhancer (should accept ANTHROPIC_AUTH_TOKEN) # Create enhancer (should accept ANTHROPIC_AUTH_TOKEN)
enhancer = SkillEnhancer(self.skill_dir) enhancer = SkillEnhancer(self.skill_dir)
@@ -290,9 +281,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
with ( with (
patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}), patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}),
patch( patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic,
"skill_seekers.cli.enhance_skill.anthropic.Anthropic"
) as mock_anthropic,
): ):
enhancer = SkillEnhancer(self.skill_dir) enhancer = SkillEnhancer(self.skill_dir)
@@ -301,9 +290,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase):
mock_thinking_block = SimpleNamespace(type="thinking") mock_thinking_block = SimpleNamespace(type="thinking")
# TextBlock has .text attribute # TextBlock has .text attribute
mock_text_block = SimpleNamespace( mock_text_block = SimpleNamespace(text="# Enhanced SKILL.md\n\nContent here")
text="# Enhanced SKILL.md\n\nContent here"
)
mock_message = Mock() mock_message = Mock()
mock_message.content = [mock_thinking_block, mock_text_block] mock_message.content = [mock_thinking_block, mock_text_block]

View File

@@ -31,9 +31,7 @@ def test_timeout_with_retry():
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2) downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2)
with ( with (
patch( patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
"requests.get", side_effect=requests.Timeout("Connection timeout")
) as mock_get,
patch("time.sleep") as mock_sleep, patch("time.sleep") as mock_sleep,
): # Mock sleep to speed up test ): # Mock sleep to speed up test
content = downloader.download() content = downloader.download()
@@ -143,9 +141,7 @@ def test_custom_max_retries():
downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5) downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5)
with ( with (
patch( patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get,
"requests.get", side_effect=requests.Timeout("Connection timeout")
) as mock_get,
patch("time.sleep"), patch("time.sleep"),
): ):
content = downloader.download() content = downloader.download()
@@ -203,7 +199,9 @@ def test_is_markdown_rejects_html_doctype():
"""Test that HTML with DOCTYPE is rejected (prevents redirect trap)""" """Test that HTML with DOCTYPE is rejected (prevents redirect trap)"""
downloader = LlmsTxtDownloader("https://example.com/llms.txt") downloader = LlmsTxtDownloader("https://example.com/llms.txt")
html = "<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>" html = (
"<!DOCTYPE html><html><head><title>Product Page</title></head><body>Content</body></html>"
)
assert not downloader._is_markdown(html) assert not downloader._is_markdown(html)
# Test case-insensitive # Test case-insensitive
@@ -230,9 +228,7 @@ def test_is_markdown_rejects_html_meta():
html_with_head = "<head><title>Page</title></head><body>Content</body>" html_with_head = "<head><title>Page</title></head><body>Content</body>"
assert not downloader._is_markdown(html_with_head) assert not downloader._is_markdown(html_with_head)
html_with_meta = ( html_with_meta = '<meta charset="utf-8"><meta name="viewport" content="width=device-width">'
'<meta charset="utf-8"><meta name="viewport" content="width=device-width">'
)
assert not downloader._is_markdown(html_with_meta) assert not downloader._is_markdown(html_with_meta)
@@ -244,9 +240,7 @@ def test_is_markdown_accepts_markdown_with_html_words():
assert downloader._is_markdown(markdown) assert downloader._is_markdown(markdown)
# Test with actual markdown patterns # Test with actual markdown patterns
markdown_with_code = ( markdown_with_code = "# HTML Tutorial\n\n```html\n<div>example</div>\n```\n\n## More content"
"# HTML Tutorial\n\n```html\n<div>example</div>\n```\n\n## More content"
)
assert downloader._is_markdown(markdown_with_code) assert downloader._is_markdown(markdown_with_code)
@@ -255,9 +249,7 @@ def test_html_detection_only_scans_first_500_chars():
downloader = LlmsTxtDownloader("https://example.com/llms.txt") downloader = LlmsTxtDownloader("https://example.com/llms.txt")
# HTML tag after 500 chars should not be detected # HTML tag after 500 chars should not be detected
safe_markdown = ( safe_markdown = "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n<!DOCTYPE html>"
"# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n<!DOCTYPE html>"
)
# This should pass because <!DOCTYPE html> is beyond first 500 chars # This should pass because <!DOCTYPE html> is beyond first 500 chars
if len(safe_markdown[:500]) < len("<!DOCTYPE html>"): if len(safe_markdown[:500]) < len("<!DOCTYPE html>"):
# If the HTML is within 500 chars, adjust test # If the HTML is within 500 chars, adjust test
@@ -294,9 +286,7 @@ def test_download_rejects_html_redirect():
mock_response = Mock() mock_response = Mock()
# Simulate server returning HTML instead of markdown # Simulate server returning HTML instead of markdown
mock_response.text = ( mock_response.text = "<!DOCTYPE html><html><body><h1>Product Page</h1></body></html>"
"<!DOCTYPE html><html><body><h1>Product Page</h1></body></html>"
)
mock_response.raise_for_status = Mock() mock_response.raise_for_status = Mock()
with patch("requests.get", return_value=mock_response): with patch("requests.get", return_value=mock_response):

View File

@@ -73,9 +73,7 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=False) converter = DocToSkillConverter(config, dry_run=False)
with ( with (
patch.object( patch.object(converter, "_try_llms_txt", return_value=False) as mock_try,
converter, "_try_llms_txt", return_value=False
) as mock_try,
patch.object(converter, "scrape_page"), patch.object(converter, "scrape_page"),
patch.object(converter, "save_summary"), patch.object(converter, "save_summary"),
): ):
@@ -154,9 +152,7 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase):
converter = DocToSkillConverter(config, dry_run=False) converter = DocToSkillConverter(config, dry_run=False)
with ( with (
patch.object( patch.object(converter, "_try_llms_txt", return_value=False) as mock_try,
converter, "_try_llms_txt", return_value=False
) as mock_try,
patch.object(converter, "scrape_page_async", return_value=None), patch.object(converter, "scrape_page_async", return_value=None),
patch.object(converter, "save_summary"), patch.object(converter, "save_summary"),
): ):
@@ -252,9 +248,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt) self.assertFalse(converter.skip_llms_txt)
self.assertTrue( self.assertTrue(any("Invalid value" in log and "0" in log for log in cm.output))
any("Invalid value" in log and "0" in log for log in cm.output)
)
def test_skip_llms_txt_with_int_one_logs_warning(self): def test_skip_llms_txt_with_int_one_logs_warning(self):
"""Test that integer 1 logs warning and defaults to False.""" """Test that integer 1 logs warning and defaults to False."""
@@ -268,9 +262,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt) self.assertFalse(converter.skip_llms_txt)
self.assertTrue( self.assertTrue(any("Invalid value" in log and "1" in log for log in cm.output))
any("Invalid value" in log and "1" in log for log in cm.output)
)
def test_skip_llms_txt_with_string_logs_warning(self): def test_skip_llms_txt_with_string_logs_warning(self):
"""Test that string values log warning and default to False.""" """Test that string values log warning and default to False."""
@@ -284,9 +276,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt) self.assertFalse(converter.skip_llms_txt)
self.assertTrue( self.assertTrue(any("Invalid value" in log and "true" in log for log in cm.output))
any("Invalid value" in log and "true" in log for log in cm.output)
)
def test_skip_llms_txt_with_none_logs_warning(self): def test_skip_llms_txt_with_none_logs_warning(self):
"""Test that None logs warning and defaults to False.""" """Test that None logs warning and defaults to False."""
@@ -300,9 +290,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase):
with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm:
converter = DocToSkillConverter(config, dry_run=True) converter = DocToSkillConverter(config, dry_run=True)
self.assertFalse(converter.skip_llms_txt) self.assertFalse(converter.skip_llms_txt)
self.assertTrue( self.assertTrue(any("Invalid value" in log and "None" in log for log in cm.output))
any("Invalid value" in log and "None" in log for log in cm.output)
)
def test_scraping_proceeds_when_llms_txt_skipped(self): def test_scraping_proceeds_when_llms_txt_skipped(self):
"""Test that HTML scraping proceeds normally when llms.txt is skipped.""" """Test that HTML scraping proceeds normally when llms.txt is skipped."""