From 9d43956b1dcbb3f91e94d0c5e72481ffc7491b45 Mon Sep 17 00:00:00 2001 From: yusyus Date: Sat, 17 Jan 2026 23:56:24 +0300 Subject: [PATCH] style: Run black formatter on 16 files Applied black formatting to files modified in linting fixes: Source files (8): - config_extractor.py - doc_scraper.py - how_to_guide_builder.py - llms_txt_downloader.py - llms_txt_parser.py - pattern_recognizer.py - test_example_extractor.py - unified_codebase_analyzer.py Test files (8): - test_architecture_scenarios.py - test_async_scraping.py - test_github_scraper.py - test_guide_enhancer.py - test_install_agent.py - test_issue_219_e2e.py - test_llms_txt_downloader.py - test_skip_llms_txt.py All formatting issues resolved. Co-Authored-By: Claude Sonnet 4.5 --- src/skill_seekers/cli/config_extractor.py | 78 +++++- src/skill_seekers/cli/doc_scraper.py | 259 +++++++++++++----- src/skill_seekers/cli/how_to_guide_builder.py | 85 ++++-- src/skill_seekers/cli/llms_txt_downloader.py | 1 - src/skill_seekers/cli/llms_txt_parser.py | 9 +- src/skill_seekers/cli/pattern_recognizer.py | 113 +++++--- .../cli/test_example_extractor.py | 88 ++++-- .../cli/unified_codebase_analyzer.py | 46 +++- tests/test_architecture_scenarios.py | 194 +++++++++---- tests/test_async_scraping.py | 22 +- tests/test_github_scraper.py | 101 +++++-- tests/test_guide_enhancer.py | 110 ++++++-- tests/test_install_agent.py | 133 ++++++--- tests/test_issue_219_e2e.py | 40 ++- tests/test_llms_txt_downloader.py | 41 ++- tests/test_skip_llms_txt.py | 59 +++- 16 files changed, 1044 insertions(+), 335 deletions(-) diff --git a/src/skill_seekers/cli/config_extractor.py b/src/skill_seekers/cli/config_extractor.py index 58bea85..133ee58 100644 --- a/src/skill_seekers/cli/config_extractor.py +++ b/src/skill_seekers/cli/config_extractor.py @@ -65,7 +65,15 @@ class ConfigFile: file_path: str relative_path: str config_type: Literal[ - "json", "yaml", "toml", "env", "ini", "python", "javascript", "dockerfile", "docker-compose" + "json", + "yaml", + "toml", + "env", + "ini", + "python", + "javascript", + "dockerfile", + "docker-compose", ] purpose: str # Inferred purpose: database, api, logging, etc. settings: list[ConfigSetting] = field(default_factory=list) @@ -81,7 +89,9 @@ class ConfigExtractionResult: config_files: list[ConfigFile] = field(default_factory=list) total_files: int = 0 total_settings: int = 0 - detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files + detected_patterns: dict[str, list[str]] = field( + default_factory=dict + ) # pattern -> files errors: list[str] = field(default_factory=list) def to_dict(self) -> dict: @@ -195,7 +205,12 @@ class ConfigFileDetector: }, "javascript": { "patterns": ["*.config.js", "*.config.ts"], - "names": ["config.js", "next.config.js", "vue.config.js", "webpack.config.js"], + "names": [ + "config.js", + "next.config.js", + "vue.config.js", + "webpack.config.js", + ], }, "dockerfile": { "patterns": ["Dockerfile*"], @@ -226,7 +241,9 @@ class ConfigFileDetector: "*.egg-info", } - def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]: + def find_config_files( + self, directory: Path, max_files: int = 100 + ) -> list[ConfigFile]: """ Find all configuration files in directory. @@ -297,7 +314,10 @@ class ConfigFileDetector: filename = file_path.name.lower() # Database configs - if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]): + if any( + word in path_lower + for word in ["database", "db", "postgres", "mysql", "mongo"] + ): return "database_configuration" # API configs @@ -313,7 +333,9 @@ class ConfigFileDetector: return "docker_configuration" # CI/CD configs - if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]): + if any( + word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"] + ): return "ci_cd_configuration" # Package configs @@ -325,7 +347,11 @@ class ConfigFileDetector: return "typescript_configuration" # Framework configs - if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename: + if ( + "next.config" in filename + or "vue.config" in filename + or "webpack.config" in filename + ): return "framework_configuration" # Environment configs @@ -467,7 +493,12 @@ class ConfigParser: for node in ast.walk(tree): # Get variable name and skip private variables - if isinstance(node, ast.Assign) and len(node.targets) == 1 and isinstance(node.targets[0], ast.Name) and not node.targets[0].id.startswith("_"): + if ( + isinstance(node, ast.Assign) + and len(node.targets) == 1 + and isinstance(node.targets[0], ast.Name) + and not node.targets[0].id.startswith("_") + ): key = node.targets[0].id # Extract value @@ -500,7 +531,9 @@ class ConfigParser: for match in re.finditer(pattern, config_file.raw_content): if len(match.groups()) >= 2: key = match.group(1) - value = match.group(3) if len(match.groups()) > 2 else match.group(2) + value = ( + match.group(3) if len(match.groups()) > 2 else match.group(2) + ) setting = ConfigSetting( key=key, value=value, value_type=self._infer_type(value) @@ -546,7 +579,9 @@ class ConfigParser: for key, value in data.items(): if isinstance(value, dict): # Recurse into nested dicts - self._extract_settings_from_dict(value, config_file, parent_path + [key]) + self._extract_settings_from_dict( + value, config_file, parent_path + [key] + ) else: setting = ConfigSetting( key=".".join(parent_path + [key]) if parent_path else key, @@ -593,11 +628,26 @@ class ConfigPatternDetector: # Known configuration patterns KNOWN_PATTERNS = { "database_config": { - "keys": ["host", "port", "database", "user", "username", "password", "db_name"], + "keys": [ + "host", + "port", + "database", + "user", + "username", + "password", + "db_name", + ], "min_match": 3, }, "api_config": { - "keys": ["base_url", "api_key", "api_secret", "timeout", "retry", "endpoint"], + "keys": [ + "base_url", + "api_key", + "api_secret", + "timeout", + "retry", + "endpoint", + ], "min_match": 2, }, "logging_config": { @@ -822,7 +872,9 @@ def main(): print("\nšŸ“Š Summary:") print(f" Config files found: {result.total_files}") print(f" Total settings: {result.total_settings}") - print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}") + print( + f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}" + ) if "ai_enhancements" in output_dict: print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)") diff --git a/src/skill_seekers/cli/doc_scraper.py b/src/skill_seekers/cli/doc_scraper.py index 26540a1..2ef1d6c 100755 --- a/src/skill_seekers/cli/doc_scraper.py +++ b/src/skill_seekers/cli/doc_scraper.py @@ -148,7 +148,9 @@ def infer_description_from_docs( class DocToSkillConverter: - def __init__(self, config: dict[str, Any], dry_run: bool = False, resume: bool = False) -> None: + def __init__( + self, config: dict[str, Any], dry_run: bool = False, resume: bool = False + ) -> None: self.config = config self.name = config["name"] self.base_url = config["base_url"] @@ -163,7 +165,9 @@ class DocToSkillConverter: # Checkpoint config checkpoint_config = config.get("checkpoint", {}) self.checkpoint_enabled = checkpoint_config.get("enabled", False) - self.checkpoint_interval = checkpoint_config.get("interval", DEFAULT_CHECKPOINT_INTERVAL) + self.checkpoint_interval = checkpoint_config.get( + "interval", DEFAULT_CHECKPOINT_INTERVAL + ) # llms.txt detection state skip_llms_txt_value = config.get("skip_llms_txt", False) @@ -318,7 +322,9 @@ class DocToSkillConverter: for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): text = self.clean_text(h.get_text()) if text: - page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")}) + page["headings"].append( + {"level": h.name, "text": text, "id": h.get("id", "")} + ) # Extract code with language detection code_selector = selectors.get("code_blocks", "pre code") @@ -385,7 +391,9 @@ class DocToSkillConverter: import re # Detect if content is actually HTML (some .md URLs return HTML) - if content.strip().startswith(" 10: - page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"}) + page["code_samples"].append( + {"code": code.strip(), "language": lang or "unknown"} + ) # Extract content (paragraphs) content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL) @@ -444,7 +458,11 @@ class DocToSkillConverter: # Strip anchor fragments full_url = full_url.split("#")[0] # Only include .md URLs to avoid client-side rendered HTML pages - if ".md" in full_url and self.is_valid_url(full_url) and full_url not in page["links"]: + if ( + ".md" in full_url + and self.is_valid_url(full_url) + and full_url not in page["links"] + ): page["links"].append(full_url) return page @@ -508,14 +526,18 @@ class DocToSkillConverter: for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): text = self.clean_text(h.get_text()) if text: - page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")}) + page["headings"].append( + {"level": h.name, "text": text, "id": h.get("id", "")} + ) # Extract code blocks for code_elem in main.select("pre code, pre"): code = code_elem.get_text() if len(code.strip()) > 10: lang = self.detect_language(code_elem, code) - page["code_samples"].append({"code": code.strip(), "language": lang}) + page["code_samples"].append( + {"code": code.strip(), "language": lang} + ) # Extract paragraphs paragraphs = [] @@ -536,7 +558,9 @@ class DocToSkillConverter: # Log low-confidence detections for debugging if confidence < 0.5: - logger.debug(f"Low confidence language detection: {lang} ({confidence:.2f})") + logger.debug( + f"Low confidence language detection: {lang} ({confidence:.2f})" + ) return lang # Return string for backward compatibility @@ -549,7 +573,10 @@ class DocToSkillConverter: # Look for "Example:" or "Pattern:" sections for elem in main.find_all(["p", "div"]): text = elem.get_text().lower() - if any(word in text for word in ["example:", "pattern:", "usage:", "typical use"]): + if any( + word in text + for word in ["example:", "pattern:", "usage:", "typical use"] + ): # Get the code that follows next_code = elem.find_next(["pre", "code"]) if next_code: @@ -571,7 +598,9 @@ class DocToSkillConverter: """Save page data (skip pages with empty content)""" # Skip pages with empty or very short content if not page.get("content") or len(page.get("content", "")) < 50: - logger.debug("Skipping page with empty/short content: %s", page.get("url", "unknown")) + logger.debug( + "Skipping page with empty/short content: %s", page.get("url", "unknown") + ) return url_hash = hashlib.md5(page["url"].encode()).hexdigest()[:10] @@ -619,7 +648,10 @@ class DocToSkillConverter: # Add new URLs for link in page["links"]: - if link not in self.visited_urls and link not in self.pending_urls: + if ( + link not in self.visited_urls + and link not in self.pending_urls + ): self.pending_urls.append(link) else: # Single-threaded mode (no lock needed) @@ -640,7 +672,9 @@ class DocToSkillConverter: except Exception as e: if self.workers > 1: with self.lock: - logger.error(" āœ— Error scraping %s: %s: %s", url, type(e).__name__, e) + logger.error( + " āœ— Error scraping %s: %s: %s", url, type(e).__name__, e + ) else: logger.error(" āœ— Error scraping page: %s: %s", type(e).__name__, e) logger.error(" URL: %s", url) @@ -715,7 +749,8 @@ class DocToSkillConverter: md_urls.append(md_url) logger.info( - " āœ“ Converted %d URLs to .md format (will validate during crawl)", len(md_urls) + " āœ“ Converted %d URLs to .md format (will validate during crawl)", + len(md_urls), ) return md_urls @@ -757,7 +792,9 @@ class DocToSkillConverter: # Check for explicit config URL first explicit_url = self.config.get("llms_txt_url") if explicit_url: - logger.info("\nšŸ“Œ Using explicit llms_txt_url from config: %s", explicit_url) + logger.info( + "\nšŸ“Œ Using explicit llms_txt_url from config: %s", explicit_url + ) # Download explicit file first downloader = LlmsTxtDownloader(explicit_url) @@ -779,7 +816,8 @@ class DocToSkillConverter: if variants: logger.info( - "\nšŸ” Found %d total variant(s), downloading remaining...", len(variants) + "\nšŸ” Found %d total variant(s), downloading remaining...", + len(variants), ) for variant_info in variants: url = variant_info["url"] @@ -800,7 +838,11 @@ class DocToSkillConverter: ) with open(extra_filepath, "w", encoding="utf-8") as f: f.write(extra_content) - logger.info(" āœ“ %s (%d chars)", extra_filename, len(extra_content)) + logger.info( + " āœ“ %s (%d chars)", + extra_filename, + len(extra_content), + ) # Parse explicit file for skill building parser = LlmsTxtParser(content, self.base_url) @@ -822,7 +864,8 @@ class DocToSkillConverter: self.pending_urls.append(url) logger.info( - " šŸ“‹ %d URLs added to crawl queue after filtering", len(self.pending_urls) + " šŸ“‹ %d URLs added to crawl queue after filtering", + len(self.pending_urls), ) # Return False to trigger HTML scraping with the populated pending_urls @@ -872,7 +915,9 @@ class DocToSkillConverter: logger.info(" āœ“ %s (%d chars)", filename, len(content)) if not downloaded: - logger.warning("āš ļø Failed to download any variants, falling back to HTML scraping") + logger.warning( + "āš ļø Failed to download any variants, falling back to HTML scraping" + ) return False # Save ALL variants to references/ @@ -906,7 +951,10 @@ class DocToSkillConverter: if self.is_valid_url(url) and url not in self.visited_urls: self.pending_urls.append(url) - logger.info(" šŸ“‹ %d URLs added to crawl queue after filtering", len(self.pending_urls)) + logger.info( + " šŸ“‹ %d URLs added to crawl queue after filtering", + len(self.pending_urls), + ) # Return False to trigger HTML scraping with the populated pending_urls self.llms_txt_detected = True @@ -947,7 +995,8 @@ class DocToSkillConverter: llms_result = self._try_llms_txt() if llms_result: logger.info( - "\nāœ… Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant + "\nāœ… Used llms.txt (%s) - skipping HTML scraping", + self.llms_txt_variant, ) self.save_summary() return @@ -983,7 +1032,9 @@ class DocToSkillConverter: # Single-threaded mode (original sequential logic) if self.workers <= 1: - while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): + while self.pending_urls and ( + unlimited or len(self.visited_urls) < preview_limit + ): url = self.pending_urls.popleft() if url in self.visited_urls: @@ -995,7 +1046,9 @@ class DocToSkillConverter: # Just show what would be scraped logger.info(" [Preview] %s", url) try: - headers = {"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"} + headers = { + "User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)" + } response = requests.get(url, headers=headers, timeout=10) soup = BeautifulSoup(response.content, "html.parser") @@ -1007,11 +1060,16 @@ class DocToSkillConverter: if main: for link in main.find_all("a", href=True): href = urljoin(url, link["href"]) - if self.is_valid_url(href) and href not in self.visited_urls: + if ( + self.is_valid_url(href) + and href not in self.visited_urls + ): self.pending_urls.append(href) except Exception as e: # Failed to extract links in fast mode, continue anyway - logger.warning("āš ļø Warning: Could not extract links from %s: %s", url, e) + logger.warning( + "āš ļø Warning: Could not extract links from %s: %s", url, e + ) else: self.scrape_page(url) self.pages_scraped += 1 @@ -1034,7 +1092,9 @@ class DocToSkillConverter: with ThreadPoolExecutor(max_workers=self.workers) as executor: futures = [] - while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): + while self.pending_urls and ( + unlimited or len(self.visited_urls) < preview_limit + ): # Get next batch of URLs (thread-safe) batch = [] batch_size = min(self.workers * 2, len(self.pending_urls)) @@ -1092,9 +1152,14 @@ class DocToSkillConverter: self.pages_scraped += 1 if self.dry_run: - logger.info("\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls)) + logger.info( + "\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls) + ) if len(self.visited_urls) >= preview_limit: - logger.info(" (showing first %d, actual scraping may find more)", preview_limit) + logger.info( + " (showing first %d, actual scraping may find more)", + preview_limit, + ) logger.info("\nšŸ’” To actually scrape, run without --dry-run") else: logger.info("\nāœ… Scraped %d pages", len(self.visited_urls)) @@ -1114,7 +1179,8 @@ class DocToSkillConverter: llms_result = self._try_llms_txt() if llms_result: logger.info( - "\nāœ… Used llms.txt (%s) - skipping HTML scraping", self.llms_txt_variant + "\nāœ… Used llms.txt (%s) - skipping HTML scraping", + self.llms_txt_variant, ) self.save_summary() return @@ -1155,7 +1221,9 @@ class DocToSkillConverter: ) as client: tasks = [] - while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): + while self.pending_urls and ( + unlimited or len(self.visited_urls) < preview_limit + ): # Get next batch of URLs batch = [] batch_size = min(self.workers * 2, len(self.pending_urls)) @@ -1191,7 +1259,11 @@ class DocToSkillConverter: logger.info(" [%d pages scraped]", self.pages_scraped) # Checkpoint saving - if not self.dry_run and self.checkpoint_enabled and self.pages_scraped % self.checkpoint_interval == 0: + if ( + not self.dry_run + and self.checkpoint_enabled + and self.pages_scraped % self.checkpoint_interval == 0 + ): self.save_checkpoint() # Wait for any remaining tasks @@ -1199,10 +1271,13 @@ class DocToSkillConverter: await asyncio.gather(*tasks, return_exceptions=True) if self.dry_run: - logger.info("\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls)) + logger.info( + "\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls) + ) if len(self.visited_urls) >= preview_limit: logger.info( - " (showing first %d, actual scraping may find more)", int(preview_limit) + " (showing first %d, actual scraping may find more)", + int(preview_limit), ) logger.info("\nšŸ’” To actually scrape, run without --dry-run") else: @@ -1237,7 +1312,10 @@ class DocToSkillConverter: pages.append(json.load(f)) except Exception as e: logger.error( - "āš ļø Error loading scraped data file %s: %s: %s", json_file, type(e).__name__, e + "āš ļø Error loading scraped data file %s: %s: %s", + json_file, + type(e).__name__, + e, ) logger.error( " Suggestion: File may be corrupted, consider re-scraping with --fresh" @@ -1245,7 +1323,9 @@ class DocToSkillConverter: return pages - def smart_categorize(self, pages: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]: + def smart_categorize( + self, pages: list[dict[str, Any]] + ) -> dict[str, list[dict[str, Any]]]: """Improved categorization with better pattern matching""" category_defs = self.config.get("categories", {}) @@ -1297,14 +1377,18 @@ class DocToSkillConverter: for page in pages: path = urlparse(page["url"]).path segments = [ - s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"] + s + for s in path.split("/") + if s and s not in ["en", "stable", "latest", "docs"] ] for seg in segments: url_segments[seg] += 1 # Top segments become categories - top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[:8] + top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[ + :8 + ] categories = {} for seg, count in top_segments: @@ -1324,7 +1408,9 @@ class DocToSkillConverter: return categories - def generate_quick_reference(self, pages: list[dict[str, Any]]) -> list[dict[str, str]]: + def generate_quick_reference( + self, pages: list[dict[str, Any]] + ) -> list[dict[str, str]]: """Generate quick reference from common patterns (NEW FEATURE)""" quick_ref = [] @@ -1393,7 +1479,9 @@ class DocToSkillConverter: logger.info(" āœ“ %s.md (%d pages)", category, len(pages)) def create_enhanced_skill_md( - self, categories: dict[str, list[dict[str, Any]]], quick_ref: list[dict[str, str]] + self, + categories: dict[str, list[dict[str, Any]]], + quick_ref: list[dict[str, str]], ) -> None: """Create SKILL.md with actual examples (IMPROVED)""" # Try to infer description if not in config @@ -1404,7 +1492,9 @@ class DocToSkillConverter: if pages: first_page_html = pages[0].get("raw_html", "") break - description = infer_description_from_docs(self.base_url, first_page_html, self.name) + description = infer_description_from_docs( + self.base_url, first_page_html, self.name + ) else: description = self.config["description"] @@ -1412,7 +1502,9 @@ class DocToSkillConverter: example_codes = [] for pages in categories.values(): for page in pages[:3]: # First 3 pages per category - for sample in page.get("code_samples", [])[:2]: # First 2 samples per page + for sample in page.get("code_samples", [])[ + :2 + ]: # First 2 samples per page code = sample.get("code", sample if isinstance(sample, str) else "") lang = sample.get("language", "unknown") if len(code) < 200 and lang != "unknown": @@ -1462,7 +1554,9 @@ This skill should be triggered when: content += pattern.get("code", "")[:300] content += "\n```\n\n" else: - content += "*Quick reference patterns will be added as you use the skill.*\n\n" + content += ( + "*Quick reference patterns will be added as you use the skill.*\n\n" + ) # Add example codes from docs if example_codes: @@ -1477,7 +1571,9 @@ This skill includes comprehensive documentation in `references/`: """ for cat in sorted(categories.keys()): - content += f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n" + content += ( + f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n" + ) content += """ Use `view` to read specific reference files when detailed information is needed. @@ -1625,7 +1721,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]: ) # Validate base_url - if "base_url" in config and not config["base_url"].startswith(("http://", "https://")): + if "base_url" in config and not config["base_url"].startswith( + ("http://", "https://") + ): errors.append( f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)" ) @@ -1648,7 +1746,9 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]: errors.append("'url_patterns' must be a dictionary") else: for key in ["include", "exclude"]: - if key in config["url_patterns"] and not isinstance(config["url_patterns"][key], list): + if key in config["url_patterns"] and not isinstance( + config["url_patterns"][key], list + ): errors.append(f"'url_patterns.{key}' must be a list") # Validate categories @@ -1740,12 +1840,18 @@ def load_config(config_path: str) -> dict[str, Any]: except json.JSONDecodeError as e: logger.error("āŒ Error: Invalid JSON in config file: %s", config_path) logger.error(" Details: %s", e) - logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno) + logger.error( + " Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno + ) sys.exit(1) except FileNotFoundError: logger.error("āŒ Error: Config file not found: %s", config_path) - logger.error(" Suggestion: Create a config file or use an existing one from configs/") - logger.error(" Available configs: react.json, vue.json, django.json, godot.json") + logger.error( + " Suggestion: Create a config file or use an existing one from configs/" + ) + logger.error( + " Available configs: react.json, vue.json, django.json, godot.json" + ) sys.exit(1) # Validate config @@ -1763,7 +1869,9 @@ def load_config(config_path: str) -> dict[str, Any]: logger.error("āŒ Configuration validation errors in %s:", config_path) for error in errors: logger.error(" - %s", error) - logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples") + logger.error( + "\n Suggestion: Fix the above errors or check configs/ for working examples" + ) sys.exit(1) return config @@ -1870,10 +1978,16 @@ def setup_argument_parser() -> argparse.ArgumentParser: ) parser.add_argument( - "--interactive", "-i", action="store_true", help="Interactive configuration mode" + "--interactive", + "-i", + action="store_true", + help="Interactive configuration mode", ) parser.add_argument( - "--config", "-c", type=str, help="Load configuration from file (e.g., configs/godot.json)" + "--config", + "-c", + type=str, + help="Load configuration from file (e.g., configs/godot.json)", ) parser.add_argument("--name", type=str, help="Skill name") parser.add_argument("--url", type=str, help="Base documentation URL") @@ -1902,14 +2016,18 @@ def setup_argument_parser() -> argparse.ArgumentParser: help="Open terminal window for enhancement (use with --enhance-local)", ) parser.add_argument( - "--api-key", type=str, help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)" + "--api-key", + type=str, + help="Anthropic API key for --enhance (or set ANTHROPIC_API_KEY)", ) parser.add_argument( "--resume", action="store_true", help="Resume from last checkpoint (for interrupted scrapes)", ) - parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh") + parser.add_argument( + "--fresh", action="store_true", help="Clear checkpoint and start fresh" + ) parser.add_argument( "--rate-limit", "-r", @@ -1936,10 +2054,16 @@ def setup_argument_parser() -> argparse.ArgumentParser: help="Disable rate limiting completely (same as --rate-limit 0)", ) parser.add_argument( - "--verbose", "-v", action="store_true", help="Enable verbose output (DEBUG level logging)" + "--verbose", + "-v", + action="store_true", + help="Enable verbose output (DEBUG level logging)", ) parser.add_argument( - "--quiet", "-q", action="store_true", help="Minimize output (WARNING level logging only)" + "--quiet", + "-q", + action="store_true", + help="Minimize output (WARNING level logging only)", ) return parser @@ -2002,11 +2126,15 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]: if args.workers: # Validate workers count if args.workers < 1: - logger.error("āŒ Error: --workers must be at least 1 (got %d)", args.workers) + logger.error( + "āŒ Error: --workers must be at least 1 (got %d)", args.workers + ) logger.error(" Suggestion: Use --workers 1 (default) or omit the flag") sys.exit(1) if args.workers > 10: - logger.warning("āš ļø Warning: --workers capped at 10 (requested %d)", args.workers) + logger.warning( + "āš ļø Warning: --workers capped at 10 (requested %d)", args.workers + ) args.workers = 10 config["workers"] = args.workers if args.workers > 1: @@ -2160,7 +2288,11 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non logger.info("=" * 60 + "\n") try: - enhance_cmd = ["python3", "cli/enhance_skill.py", f"output/{config['name']}/"] + enhance_cmd = [ + "python3", + "cli/enhance_skill.py", + f"output/{config['name']}/", + ] if args.api_key: enhance_cmd.extend(["--api-key", args.api_key]) @@ -2204,9 +2336,14 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non # Suggest enhancement if not done if not args.enhance and not args.enhance_local: logger.info("\nšŸ’” Optional: Enhance SKILL.md with Claude:") - logger.info(" Local (recommended): skill-seekers-enhance output/%s/", config["name"]) + logger.info( + " Local (recommended): skill-seekers-enhance output/%s/", config["name"] + ) logger.info(" or re-run with: --enhance-local") - logger.info(" API-based: skill-seekers-enhance-api output/%s/", config["name"]) + logger.info( + " API-based: skill-seekers-enhance-api output/%s/", + config["name"], + ) logger.info(" or re-run with: --enhance") logger.info( "\nšŸ’” Tip: Use --interactive-enhancement with --enhance-local to open terminal window" diff --git a/src/skill_seekers/cli/how_to_guide_builder.py b/src/skill_seekers/cli/how_to_guide_builder.py index ee908c6..070e7b8 100644 --- a/src/skill_seekers/cli/how_to_guide_builder.py +++ b/src/skill_seekers/cli/how_to_guide_builder.py @@ -79,7 +79,9 @@ class WorkflowStep: setup_required: str | None = None explanation: str | None = None # Why this step matters common_pitfall: str | None = None # Warning for this step - common_variations: list[str] = field(default_factory=list) # AI: Alternative approaches + common_variations: list[str] = field( + default_factory=list + ) # AI: Alternative approaches @dataclass @@ -221,7 +223,9 @@ class WorkflowAnalyzer: # Check if next statement is assertion (verification) idx = statements.index(stmt) verification = None - if idx + 1 < len(statements) and isinstance(statements[idx + 1], ast.Assert): + if idx + 1 < len(statements) and isinstance( + statements[idx + 1], ast.Assert + ): verification = ast.get_source_segment(code, statements[idx + 1]) steps.append( @@ -240,7 +244,9 @@ class WorkflowAnalyzer: return steps - def _extract_steps_heuristic(self, code: str, _workflow: dict) -> list[WorkflowStep]: + def _extract_steps_heuristic( + self, code: str, _workflow: dict + ) -> list[WorkflowStep]: """Extract steps using heuristics (for non-Python or invalid syntax)""" steps = [] lines = code.split("\n") @@ -259,7 +265,11 @@ class WorkflowAnalyzer: description = self._infer_description_from_code(step_code) steps.append( - WorkflowStep(step_number=step_num, code=step_code, description=description) + WorkflowStep( + step_number=step_num, + code=step_code, + description=description, + ) ) step_num += 1 current_step = [] @@ -272,7 +282,9 @@ class WorkflowAnalyzer: step_code = "\n".join(current_step) description = self._infer_description_from_code(step_code) steps.append( - WorkflowStep(step_number=step_num, code=step_code, description=description) + WorkflowStep( + step_number=step_num, code=step_code, description=description + ) ) return steps @@ -336,7 +348,11 @@ class WorkflowAnalyzer: def _detect_prerequisites(self, workflow: dict) -> dict: """Detect prerequisites from workflow""" - metadata = {"prerequisites": [], "required_imports": [], "required_fixtures": []} + metadata = { + "prerequisites": [], + "required_imports": [], + "required_fixtures": [], + } # Get dependencies from workflow dependencies = workflow.get("dependencies", []) @@ -438,7 +454,9 @@ class WorkflowGrouper: groups = self._group_by_file_path(workflows) return groups - def _group_by_ai_tutorial_group(self, workflows: list[dict]) -> dict[str, list[dict]]: + def _group_by_ai_tutorial_group( + self, workflows: list[dict] + ) -> dict[str, list[dict]]: """Group by AI-generated tutorial_group (from C3.6 enhancement)""" groups = defaultdict(list) ungrouped = [] @@ -866,7 +884,10 @@ class HowToGuideBuilder: if not workflows: logger.warning("No workflow examples found!") return GuideCollection( - total_guides=0, guides_by_complexity={}, guides_by_use_case={}, guides=[] + total_guides=0, + guides_by_complexity={}, + guides_by_use_case={}, + guides=[], ) # Group workflows @@ -893,7 +914,9 @@ class HowToGuideBuilder: """Filter to workflow category only""" return [ex for ex in examples if ex.get("category") == "workflow"] - def _create_guide(self, title: str, workflows: list[dict], enhancer=None) -> HowToGuide: + def _create_guide( + self, title: str, workflows: list[dict], enhancer=None + ) -> HowToGuide: """ Generate single guide from workflow(s). @@ -928,7 +951,8 @@ class HowToGuideBuilder: # Extract source files source_files = [w.get("file_path", "") for w in workflows] source_files = [ - f"{Path(f).name}:{w.get('line_start', 0)}" for f, w in zip(source_files, workflows, strict=False) + f"{Path(f).name}:{w.get('line_start', 0)}" + for f, w in zip(source_files, workflows, strict=False) ] # Create guide @@ -950,14 +974,18 @@ class HowToGuideBuilder: # Add AI enhancements if enhancer is available if enhancer: - self._enhance_guide_with_ai(guide, primary_workflow.get("ai_analysis", {}), enhancer) + self._enhance_guide_with_ai( + guide, primary_workflow.get("ai_analysis", {}), enhancer + ) elif self.enhance_with_ai and primary_workflow.get("ai_analysis"): # Fallback to old enhancement method (basic) self._enhance_guide_with_ai_basic(guide, primary_workflow["ai_analysis"]) return guide - def _generate_overview(self, primary_workflow: dict, _all_workflows: list[dict]) -> str: + def _generate_overview( + self, primary_workflow: dict, _all_workflows: list[dict] + ) -> str: """Generate guide overview""" # Try to get explanation from AI analysis if primary_workflow.get("ai_analysis"): @@ -991,7 +1019,10 @@ class HowToGuideBuilder: # Prepare guide data for enhancer guide_data = { "title": guide.title, - "steps": [{"description": step.description, "code": step.code} for step in guide.steps], + "steps": [ + {"description": step.description, "code": step.code} + for step in guide.steps + ], "language": "python", # TODO: Detect from code "prerequisites": guide.prerequisites, "description": guide.overview, @@ -1024,7 +1055,9 @@ class HowToGuideBuilder: if "use_cases" in enhanced_data: guide.use_cases = enhanced_data["use_cases"] - logger.info(f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements") + logger.info( + f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements" + ) def _enhance_guide_with_ai_basic(self, guide: HowToGuide, ai_analysis: dict): """ @@ -1089,7 +1122,9 @@ class HowToGuideBuilder: for guide in guides: # Generate filename from title - filename = guide.title.lower().replace(" ", "-").replace(":", "") + ".md" + filename = ( + guide.title.lower().replace(" ", "-").replace(":", "") + ".md" + ) file_path = use_case_dir / filename # Generate and save markdown @@ -1100,7 +1135,9 @@ class HowToGuideBuilder: index_markdown = self.generator.generate_index(collection.guides) (output_dir / "index.md").write_text(index_markdown, encoding="utf-8") - logger.info(f"āœ… Saved {collection.total_guides} guides + index to {output_dir}") + logger.info( + f"āœ… Saved {collection.total_guides} guides + index to {output_dir}" + ) # ============================================================================ @@ -1142,11 +1179,15 @@ Grouping Strategies: ) parser.add_argument( - "input", nargs="?", help="Input: directory with test files OR test_examples.json file" + "input", + nargs="?", + help="Input: directory with test files OR test_examples.json file", ) parser.add_argument( - "--input", dest="input_file", help="Input JSON file with test examples (from C3.2)" + "--input", + dest="input_file", + help="Input JSON file with test examples (from C3.2)", ) parser.add_argument( @@ -1165,7 +1206,9 @@ Grouping Strategies: parser.add_argument("--no-ai", action="store_true", help="Disable AI enhancement") parser.add_argument( - "--json-output", action="store_true", help="Output JSON summary instead of markdown files" + "--json-output", + action="store_true", + help="Output JSON summary instead of markdown files", ) args = parser.parse_args() @@ -1201,7 +1244,9 @@ Grouping Strategies: # Extract from directory using test example extractor print("āš ļø Directory input requires test example extractor") print(" Please use test_examples.json output from C3.2") - print(f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json") + print( + f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json" + ) sys.exit(1) else: diff --git a/src/skill_seekers/cli/llms_txt_downloader.py b/src/skill_seekers/cli/llms_txt_downloader.py index ee42f0f..eaa3acc 100644 --- a/src/skill_seekers/cli/llms_txt_downloader.py +++ b/src/skill_seekers/cli/llms_txt_downloader.py @@ -1,6 +1,5 @@ """ABOUTME: Downloads llms.txt files from documentation URLs with retry logic""" - import time import requests diff --git a/src/skill_seekers/cli/llms_txt_parser.py b/src/skill_seekers/cli/llms_txt_parser.py index 73df29f..21b1612 100644 --- a/src/skill_seekers/cli/llms_txt_parser.py +++ b/src/skill_seekers/cli/llms_txt_parser.py @@ -1,6 +1,5 @@ """ABOUTME: Parses llms.txt markdown content into structured page data""" - import re from urllib.parse import urljoin @@ -128,7 +127,9 @@ class LlmsTxtParser: # Extract code blocks code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL) for lang, code in code_blocks: - page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"}) + page["code_samples"].append( + {"code": code.strip(), "language": lang or "unknown"} + ) # Extract h2/h3 headings headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE) @@ -145,7 +146,9 @@ class LlmsTxtParser: content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL) # Extract paragraphs - paragraphs = [p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20] + paragraphs = [ + p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20 + ] page["content"] = "\n\n".join(paragraphs) return page diff --git a/src/skill_seekers/cli/pattern_recognizer.py b/src/skill_seekers/cli/pattern_recognizer.py index eb1e1bd..e827e59 100644 --- a/src/skill_seekers/cli/pattern_recognizer.py +++ b/src/skill_seekers/cli/pattern_recognizer.py @@ -237,7 +237,9 @@ class PatternRecognizer: self.detectors.append(TemplateMethodDetector(self.depth)) self.detectors.append(ChainOfResponsibilityDetector(self.depth)) - def analyze_file(self, file_path: str, content: str, language: str) -> PatternReport: + def analyze_file( + self, file_path: str, content: str, language: str + ) -> PatternReport: """ Analyze a single file for design patterns. @@ -428,7 +430,9 @@ class SingletonDetector(BasePatternDetector): # Python: __init__ or __new__ # Java/C#: private constructor (detected by naming) # Check if it has logic (not just pass) - if method.name in ["__new__", "__init__", "constructor"] and (method.docstring or len(method.parameters) > 1): + if method.name in ["__new__", "__init__", "constructor"] and ( + method.docstring or len(method.parameters) > 1 + ): evidence.append(f"Controlled initialization: {method.name}") confidence += 0.3 has_init_control = True @@ -535,17 +539,19 @@ class FactoryDetector(BasePatternDetector): for method in class_sig.methods: method_lower = method.name.lower() # Check if method returns something (has return type or is not void) - if any(name in method_lower for name in factory_method_names) and (method.return_type or "create" in method_lower): + if any(name in method_lower for name in factory_method_names) and ( + method.return_type or "create" in method_lower + ): return PatternInstance( - pattern_type=self.pattern_type, - category=self.category, - confidence=0.6, - location="", - class_name=class_sig.name, - method_name=method.name, - line_number=method.line_number, - evidence=[f"Factory method detected: {method.name}"], - ) + pattern_type=self.pattern_type, + category=self.category, + confidence=0.6, + location="", + class_name=class_sig.name, + method_name=method.name, + line_number=method.line_number, + evidence=[f"Factory method detected: {method.name}"], + ) return None @@ -575,7 +581,9 @@ class FactoryDetector(BasePatternDetector): # Check if multiple factory methods exist (Abstract Factory pattern) if len(factory_methods) >= 2: - evidence.append(f"Multiple factory methods: {', '.join(factory_methods[:3])}") + evidence.append( + f"Multiple factory methods: {', '.join(factory_methods[:3])}" + ) confidence += 0.2 # Check for inheritance (factory hierarchy) @@ -682,7 +690,13 @@ class ObserverDetector(BasePatternDetector): has_notify = False attach_names = ["attach", "add", "subscribe", "register", "addeventlistener"] - detach_names = ["detach", "remove", "unsubscribe", "unregister", "removeeventlistener"] + detach_names = [ + "detach", + "remove", + "unsubscribe", + "unregister", + "removeeventlistener", + ] notify_names = ["notify", "update", "emit", "publish", "fire", "trigger"] for method in class_sig.methods: @@ -786,25 +800,35 @@ class StrategyDetector(BasePatternDetector): ] if siblings: - evidence.append(f"Part of strategy family with: {', '.join(siblings[:3])}") + evidence.append( + f"Part of strategy family with: {', '.join(siblings[:3])}" + ) confidence += 0.5 - if base_class and ("strategy" in base_class.lower() or "policy" in base_class.lower()): + if base_class and ( + "strategy" in base_class.lower() or "policy" in base_class.lower() + ): evidence.append(f"Inherits from strategy base: {base_class}") confidence += 0.3 # Check if this is a strategy base class # (has subclasses in same file) - subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes] + subclasses = [ + cls.name for cls in all_classes if class_sig.name in cls.base_classes + ] if len(subclasses) >= 2: - evidence.append(f"Strategy base with implementations: {', '.join(subclasses[:3])}") + evidence.append( + f"Strategy base with implementations: {', '.join(subclasses[:3])}" + ) confidence += 0.6 # Check for single dominant method (strategy interface) if len(class_sig.methods) == 1 or len(class_sig.methods) == 2: # Single method or method + __init__ - main_method = [m for m in class_sig.methods if m.name not in ["__init__", "__new__"]] + main_method = [ + m for m in class_sig.methods if m.name not in ["__init__", "__new__"] + ] if main_method: evidence.append(f"Strategy interface method: {main_method[0].name}") confidence += 0.2 @@ -1274,7 +1298,9 @@ class TemplateMethodDetector(BasePatternDetector): class_lower = class_sig.name.lower() if any(keyword in class_lower for keyword in template_keywords): # Check if has subclasses - subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes] + subclasses = [ + cls.name for cls in all_classes if class_sig.name in cls.base_classes + ] if subclasses: return PatternInstance( @@ -1284,7 +1310,9 @@ class TemplateMethodDetector(BasePatternDetector): location="", class_name=class_sig.name, line_number=class_sig.line_number, - evidence=[f"Abstract base with subclasses: {', '.join(subclasses[:2])}"], + evidence=[ + f"Abstract base with subclasses: {', '.join(subclasses[:2])}" + ], related_classes=subclasses, ) @@ -1301,7 +1329,9 @@ class TemplateMethodDetector(BasePatternDetector): # 3. Has template method that orchestrates # Check for subclasses - subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes] + subclasses = [ + cls.name for cls in all_classes if class_sig.name in cls.base_classes + ] if len(subclasses) >= 1: evidence.append(f"Base class with {len(subclasses)} implementations") @@ -1437,7 +1467,8 @@ class ChainOfResponsibilityDetector(BasePatternDetector): # Check for set_next() method has_set_next = any( - "next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower()) + "next" in m.name.lower() + and ("set" in m.name.lower() or "add" in m.name.lower()) for m in class_sig.methods ) @@ -1458,7 +1489,9 @@ class ChainOfResponsibilityDetector(BasePatternDetector): ] if siblings and has_next_ref: - evidence.append(f"Part of handler chain with: {', '.join(siblings[:2])}") + evidence.append( + f"Part of handler chain with: {', '.join(siblings[:2])}" + ) confidence += 0.2 if confidence >= 0.5: @@ -1515,7 +1548,11 @@ class LanguageAdapter: pattern.confidence = min(pattern.confidence + 0.1, 1.0) # Strategy: Duck typing common in Python - elif pattern.pattern_type == "Strategy" and "duck typing" in evidence_str or "protocol" in evidence_str: + elif ( + pattern.pattern_type == "Strategy" + and "duck typing" in evidence_str + or "protocol" in evidence_str + ): pattern.confidence = min(pattern.confidence + 0.05, 1.0) # JavaScript/TypeScript adaptations @@ -1532,7 +1569,11 @@ class LanguageAdapter: pattern.confidence = min(pattern.confidence + 0.05, 1.0) # Observer: Event emitters are built-in - elif pattern.pattern_type == "Observer" and "eventemitter" in evidence_str or "event" in evidence_str: + elif ( + pattern.pattern_type == "Observer" + and "eventemitter" in evidence_str + or "event" in evidence_str + ): pattern.confidence = min(pattern.confidence + 0.1, 1.0) pattern.evidence.append("EventEmitter pattern detected") @@ -1549,7 +1590,9 @@ class LanguageAdapter: pattern.evidence.append("Abstract Factory pattern") # Template Method: Abstract classes common - elif pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str: + elif ( + pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str + ): pattern.confidence = min(pattern.confidence + 0.1, 1.0) # Go adaptations @@ -1602,7 +1645,9 @@ class LanguageAdapter: pattern.evidence.append("Ruby Singleton module") # Builder: Method chaining is idiomatic - elif pattern.pattern_type == "Builder" and "method chaining" in evidence_str: + elif ( + pattern.pattern_type == "Builder" and "method chaining" in evidence_str + ): pattern.confidence = min(pattern.confidence + 0.05, 1.0) # PHP adaptations @@ -1653,9 +1698,13 @@ Supported Languages: ) parser.add_argument( - "--file", action="append", help="Source file to analyze (can be specified multiple times)" + "--file", + action="append", + help="Source file to analyze (can be specified multiple times)", + ) + parser.add_argument( + "--directory", help="Directory to analyze (analyzes all source files)" ) - parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)") parser.add_argument( "--output", help="Output directory for results (default: current directory)" ) @@ -1666,7 +1715,9 @@ Supported Languages: help="Detection depth: surface (fast), deep (default), full (thorough)", ) parser.add_argument( - "--json", action="store_true", help="Output JSON format instead of human-readable" + "--json", + action="store_true", + help="Output JSON format instead of human-readable", ) parser.add_argument("--verbose", action="store_true", help="Enable verbose output") diff --git a/src/skill_seekers/cli/test_example_extractor.py b/src/skill_seekers/cli/test_example_extractor.py index cebf309..0ec427c 100644 --- a/src/skill_seekers/cli/test_example_extractor.py +++ b/src/skill_seekers/cli/test_example_extractor.py @@ -194,11 +194,15 @@ class PythonTestAnalyzer: for node in ast.walk(tree): if isinstance(node, ast.ClassDef): if self._is_test_class(node): - examples.extend(self._extract_from_test_class(node, file_path, imports)) + examples.extend( + self._extract_from_test_class(node, file_path, imports) + ) # Find test functions (pytest) elif isinstance(node, ast.FunctionDef) and self._is_test_function(node): - examples.extend(self._extract_from_test_function(node, file_path, imports)) + examples.extend( + self._extract_from_test_function(node, file_path, imports) + ) return examples @@ -232,7 +236,9 @@ class PythonTestAnalyzer: return True # Has @pytest.mark decorator for decorator in node.decorator_list: - if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(decorator): + if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse( + decorator + ): return True return False @@ -249,7 +255,9 @@ class PythonTestAnalyzer: for node in class_node.body: if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): examples.extend( - self._analyze_test_body(node, file_path, imports, setup_code=setup_code) + self._analyze_test_body( + node, file_path, imports, setup_code=setup_code + ) ) return examples @@ -261,7 +269,9 @@ class PythonTestAnalyzer: # Check for fixture parameters fixture_setup = self._extract_fixtures(func_node) - return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup) + return self._analyze_test_body( + func_node, file_path, imports, setup_code=fixture_setup + ) def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None: """Extract setUp method code""" @@ -318,7 +328,9 @@ class PythonTestAnalyzer: examples.extend(configs) # 4. Multi-step workflows (integration tests) - workflows = self._find_workflows(func_node, file_path, docstring, setup_code, tags, imports) + workflows = self._find_workflows( + func_node, file_path, docstring, setup_code, tags, imports + ) examples.extend(workflows) return examples @@ -362,7 +374,11 @@ class PythonTestAnalyzer: for node in ast.walk(func_node): # Check if meaningful instantiation - if isinstance(node, ast.Assign) and isinstance(node.value, ast.Call) and self._is_meaningful_instantiation(node): + if ( + isinstance(node, ast.Assign) + and isinstance(node.value, ast.Call) + and self._is_meaningful_instantiation(node) + ): code = ast.unparse(node) # Skip trivial or mock-only @@ -408,7 +424,11 @@ class PythonTestAnalyzer: statements = func_node.body for i, stmt in enumerate(statements): # Look for method calls and check if next statement is an assertion - if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call) and i + 1 < len(statements): + if ( + isinstance(stmt, ast.Expr) + and isinstance(stmt.value, ast.Call) + and i + 1 < len(statements) + ): next_stmt = statements[i + 1] if self._is_assertion(next_stmt): method_call = ast.unparse(stmt) @@ -455,7 +475,11 @@ class PythonTestAnalyzer: for node in ast.walk(func_node): # Must have 2+ keys and be meaningful - if isinstance(node, ast.Assign) and isinstance(node.value, ast.Dict) and len(node.value.keys) >= 2: + if ( + isinstance(node, ast.Assign) + and isinstance(node.value, ast.Dict) + and len(node.value.keys) >= 2 + ): code = ast.unparse(node) # Check if looks like configuration @@ -467,7 +491,9 @@ class PythonTestAnalyzer: code=code, language="Python", description=f"Configuration example: {description}", - expected_behavior=self._extract_assertion_after(func_node, node), + expected_behavior=self._extract_assertion_after( + func_node, node + ), setup_code=setup_code, file_path=file_path, line_start=node.lineno, @@ -568,7 +594,9 @@ class PythonTestAnalyzer: integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"] return any(keyword in test_name for keyword in integration_keywords) - def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str: + def _extract_assertion_after( + self, func_node: ast.FunctionDef, target_node: ast.AST + ) -> str: """Find assertion that follows the target node""" found_target = False for stmt in func_node.body: @@ -699,7 +727,8 @@ class GenericTestAnalyzer: code=config_match.group(0), language=language, file_path=file_path, - line_number=code[: start_pos + config_match.start()].count("\n") + 1, + line_number=code[: start_pos + config_match.start()].count("\n") + + 1, ) examples.append(example) @@ -842,7 +871,9 @@ class TestExampleExtractor: logger.warning(f"āš ļø Failed to initialize AI enhancer: {e}") self.enhance_with_ai = False - def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport: + def extract_from_directory( + self, directory: Path, recursive: bool = True + ) -> ExampleReport: """Extract examples from all test files in directory""" directory = Path(directory) @@ -896,11 +927,13 @@ class TestExampleExtractor: # Limit per file if len(filtered_examples) > self.max_per_file: # Sort by confidence and take top N - filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[ - : self.max_per_file - ] + filtered_examples = sorted( + filtered_examples, key=lambda x: x.confidence, reverse=True + )[: self.max_per_file] - logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}") + logger.info( + f"Extracted {len(filtered_examples)} examples from {file_path.name}" + ) return filtered_examples @@ -955,7 +988,9 @@ class TestExampleExtractor: # Calculate averages avg_complexity = ( - sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0 + sum(ex.complexity_score for ex in examples) / len(examples) + if examples + else 0.0 ) high_value_count = sum(1 for ex in examples if ex.confidence > 0.7) @@ -1009,10 +1044,15 @@ Examples: help="Minimum confidence threshold (0.0-1.0, default: 0.5)", ) parser.add_argument( - "--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)" + "--max-per-file", + type=int, + default=10, + help="Maximum examples per file (default: 10)", ) parser.add_argument("--json", action="store_true", help="Output JSON format") - parser.add_argument("--markdown", action="store_true", help="Output Markdown format") + parser.add_argument( + "--markdown", action="store_true", help="Output Markdown format" + ) parser.add_argument( "--recursive", action="store_true", @@ -1029,7 +1069,9 @@ Examples: # Create extractor languages = [args.language] if args.language else None extractor = TestExampleExtractor( - min_confidence=args.min_confidence, max_per_file=args.max_per_file, languages=languages + min_confidence=args.min_confidence, + max_per_file=args.max_per_file, + languages=languages, ) # Extract examples @@ -1037,7 +1079,9 @@ Examples: examples = extractor.extract_from_file(Path(args.file)) report = extractor._create_report(examples, file_path=args.file) else: - report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive) + report = extractor.extract_from_directory( + Path(args.directory), recursive=args.recursive + ) # Output results if args.json: diff --git a/src/skill_seekers/cli/unified_codebase_analyzer.py b/src/skill_seekers/cli/unified_codebase_analyzer.py index e30cf95..8876531 100644 --- a/src/skill_seekers/cli/unified_codebase_analyzer.py +++ b/src/skill_seekers/cli/unified_codebase_analyzer.py @@ -95,13 +95,20 @@ class UnifiedCodebaseAnalyzer: # Step 1: Acquire source if self.is_github_url(source): print("šŸ“¦ Source type: GitHub repository") - return self._analyze_github(source, depth, fetch_github_metadata, output_dir, interactive) + return self._analyze_github( + source, depth, fetch_github_metadata, output_dir, interactive + ) else: print("šŸ“ Source type: Local directory") return self._analyze_local(source, depth) def _analyze_github( - self, repo_url: str, depth: str, fetch_metadata: bool, output_dir: Path | None, interactive: bool = True + self, + repo_url: str, + depth: str, + fetch_metadata: bool, + output_dir: Path | None, + interactive: bool = True, ) -> AnalysisResult: """ Analyze GitHub repository with three-stream fetcher. @@ -117,7 +124,9 @@ class UnifiedCodebaseAnalyzer: AnalysisResult with all 3 streams """ # Use three-stream fetcher - fetcher = GitHubThreeStreamFetcher(repo_url, self.github_token, interactive=interactive) + fetcher = GitHubThreeStreamFetcher( + repo_url, self.github_token, interactive=interactive + ) three_streams = fetcher.fetch(output_dir) # Analyze code with specified depth @@ -236,7 +245,9 @@ class UnifiedCodebaseAnalyzer: basic = self.basic_analysis(directory) # Run full C3.x analysis using existing codebase_scraper - print("šŸ” Running C3.x components (patterns, examples, guides, configs, architecture)...") + print( + "šŸ” Running C3.x components (patterns, examples, guides, configs, architecture)..." + ) try: # Import codebase analyzer @@ -271,11 +282,19 @@ class UnifiedCodebaseAnalyzer: c3x = {**basic, "analysis_type": "c3x", **c3x_data} print("āœ… C3.x analysis complete!") - print(f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected") - print(f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted") - print(f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated") + print( + f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected" + ) + print( + f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted" + ) + print( + f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated" + ) print(f" - {len(c3x_data.get('c3_4_configs', []))} config files analyzed") - print(f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found") + print( + f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found" + ) return c3x @@ -432,7 +451,9 @@ class UnifiedCodebaseAnalyzer: if item.is_dir(): # Only include immediate subdirectories - structure["children"].append({"name": item.name, "type": "directory"}) + structure["children"].append( + {"name": item.name, "type": "directory"} + ) elif item.is_file(): structure["children"].append( {"name": item.name, "type": "file", "extension": item.suffix} @@ -526,7 +547,12 @@ class UnifiedCodebaseAnalyzer: Returns: Dict with statistics """ - stats = {"total_files": 0, "total_size_bytes": 0, "file_types": {}, "languages": {}} + stats = { + "total_files": 0, + "total_size_bytes": 0, + "file_types": {}, + "languages": {}, + } for file_path in directory.rglob("*"): if not file_path.is_file(): diff --git a/tests/test_architecture_scenarios.py b/tests/test_architecture_scenarios.py index 013473c..910a767 100644 --- a/tests/test_architecture_scenarios.py +++ b/tests/test_architecture_scenarios.py @@ -29,7 +29,10 @@ from skill_seekers.cli.github_fetcher import ( ThreeStreamData, ) from skill_seekers.cli.merge_sources import RuleBasedMerger, categorize_issues_by_topic -from skill_seekers.cli.unified_codebase_analyzer import AnalysisResult, UnifiedCodebaseAnalyzer +from skill_seekers.cli.unified_codebase_analyzer import ( + AnalysisResult, + UnifiedCodebaseAnalyzer, +) class TestScenario1GitHubThreeStream: @@ -67,7 +70,8 @@ class TestScenario1GitHubThreeStream: # Create code files src_dir = repo_dir / "src" src_dir.mkdir() - (src_dir / "auth.py").write_text(""" + (src_dir / "auth.py").write_text( + """ # OAuth authentication def google_provider(client_id, client_secret): '''Google OAuth provider''' @@ -76,20 +80,24 @@ def google_provider(client_id, client_secret): def azure_provider(tenant_id, client_id): '''Azure OAuth provider''' return Provider('azure', tenant_id, client_id) -""") - (src_dir / "async_tools.py").write_text(""" +""" + ) + (src_dir / "async_tools.py").write_text( + """ import asyncio async def async_tool(): '''Async tool decorator''' await asyncio.sleep(1) return "result" -""") +""" + ) # Create test files tests_dir = repo_dir / "tests" tests_dir.mkdir() - (tests_dir / "test_auth.py").write_text(""" + (tests_dir / "test_auth.py").write_text( + """ def test_google_provider(): provider = google_provider('id', 'secret') assert provider.name == 'google' @@ -97,10 +105,12 @@ def test_google_provider(): def test_azure_provider(): provider = azure_provider('tenant', 'id') assert provider.name == 'azure' -""") +""" + ) # Create docs - (repo_dir / "README.md").write_text(""" + (repo_dir / "README.md").write_text( + """ # FastMCP FastMCP is a Python framework for building MCP servers. @@ -116,26 +126,33 @@ pip install fastmcp - OAuth authentication (Google, Azure, GitHub) - Async/await support - Easy testing with pytest -""") +""" + ) - (repo_dir / "CONTRIBUTING.md").write_text(""" + (repo_dir / "CONTRIBUTING.md").write_text( + """ # Contributing Please follow these guidelines when contributing. -""") +""" + ) docs_dir = repo_dir / "docs" docs_dir.mkdir() - (docs_dir / "oauth.md").write_text(""" + (docs_dir / "oauth.md").write_text( + """ # OAuth Guide How to set up OAuth providers. -""") - (docs_dir / "async.md").write_text(""" +""" + ) + (docs_dir / "async.md").write_text( + """ # Async Guide How to use async tools. -""") +""" + ) return repo_dir @@ -186,11 +203,15 @@ How to use async tools. ], } - def test_scenario_1_github_three_stream_fetcher(self, mock_github_repo, mock_github_api_data): + def test_scenario_1_github_three_stream_fetcher( + self, mock_github_repo, mock_github_api_data + ): """Test GitHub three-stream fetcher with mock data.""" # Create fetcher with mock with ( - patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo), + patch.object( + GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo + ), patch.object( GitHubThreeStreamFetcher, "fetch_github_metadata", @@ -202,7 +223,9 @@ How to use async tools. return_value=mock_github_api_data["issues"], ), ): - fetcher = GitHubThreeStreamFetcher("https://github.com/jlowin/fastmcp", interactive=False) + fetcher = GitHubThreeStreamFetcher( + "https://github.com/jlowin/fastmcp", interactive=False + ) three_streams = fetcher.fetch() # Verify 3 streams exist @@ -228,10 +251,14 @@ How to use async tools. assert len(three_streams.insights_stream.known_solutions) >= 1 assert len(three_streams.insights_stream.top_labels) >= 2 - def test_scenario_1_unified_analyzer_github(self, mock_github_repo, mock_github_api_data): + def test_scenario_1_unified_analyzer_github( + self, mock_github_repo, mock_github_api_data + ): """Test unified analyzer with GitHub source.""" with ( - patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo), + patch.object( + GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo + ), patch.object( GitHubThreeStreamFetcher, "fetch_github_metadata", @@ -259,16 +286,24 @@ How to use async tools. {"name": "test_azure_provider", "file": "test_auth.py"}, ], "c3_2_examples_count": 2, - "c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}], + "c3_3_guides": [ + {"title": "OAuth Setup Guide", "file": "docs/oauth.md"} + ], "c3_4_configs": [], "c3_7_architecture": [ - {"pattern": "Service Layer", "description": "OAuth provider abstraction"} + { + "pattern": "Service Layer", + "description": "OAuth provider abstraction", + } ], } analyzer = UnifiedCodebaseAnalyzer() result = analyzer.analyze( - source="https://github.com/jlowin/fastmcp", depth="c3x", fetch_github_metadata=True, interactive=False + source="https://github.com/jlowin/fastmcp", + depth="c3x", + fetch_github_metadata=True, + interactive=False, ) # Verify result structure @@ -300,7 +335,9 @@ How to use async tools. { "name": "fastmcp-oauth", "description": "OAuth authentication for FastMCP", - "categories": {"oauth": ["oauth", "auth", "provider", "google", "azure"]}, + "categories": { + "oauth": ["oauth", "auth", "provider", "google", "azure"] + }, } ) ) @@ -454,7 +491,9 @@ pip install fastmcp # Check content quality (Architecture Section 8.2) assert "Issue #42" in router_md, "Missing issue references" assert "⭐" in router_md or "Stars:" in router_md, "Missing GitHub metadata" - assert "Quick Start" in router_md or "README" in router_md, "Missing README content" + assert ( + "Quick Start" in router_md or "README" in router_md + ), "Missing README content" class TestScenario2MultiSource: @@ -495,14 +534,30 @@ class TestScenario2MultiSource: """Test categorizing GitHub issues by topic.""" problems = [ {"number": 42, "title": "OAuth setup fails", "labels": ["oauth", "bug"]}, - {"number": 38, "title": "Async tools not working", "labels": ["async", "question"]}, - {"number": 35, "title": "Testing with pytest", "labels": ["testing", "question"]}, - {"number": 30, "title": "Google OAuth redirect", "labels": ["oauth", "question"]}, + { + "number": 38, + "title": "Async tools not working", + "labels": ["async", "question"], + }, + { + "number": 35, + "title": "Testing with pytest", + "labels": ["testing", "question"], + }, + { + "number": 30, + "title": "Google OAuth redirect", + "labels": ["oauth", "question"], + }, ] solutions = [ {"number": 25, "title": "Fixed OAuth redirect", "labels": ["oauth", "bug"]}, - {"number": 20, "title": "Async timeout solution", "labels": ["async", "bug"]}, + { + "number": 20, + "title": "Async timeout solution", + "labels": ["async", "bug"], + }, ] topics = ["oauth", "async", "testing"] @@ -533,7 +588,12 @@ class TestScenario2MultiSource: def test_scenario_2_conflict_detection(self): """Test conflict detection between docs and code.""" # Mock API data from docs - api_data = {"GoogleProvider": {"params": ["app_id", "app_secret"], "source": "html_docs"}} + api_data = { + "GoogleProvider": { + "params": ["app_id", "app_secret"], + "source": "html_docs", + } + } # Mock GitHub docs github_docs = {"readme": "Use client_id and client_secret for Google OAuth"} @@ -557,23 +617,33 @@ class TestScenario2MultiSource: # Layer 4: GitHub insights (community knowledge) # Mock source 1 (HTML docs) - source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]} + source1_data = { + "api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}] + } # Mock source 2 (GitHub C3.x) source2_data = { - "api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}] + "api": [ + {"name": "GoogleProvider", "params": ["client_id", "client_secret"]} + ] } # Mock GitHub streams _github_streams = ThreeStreamData( code_stream=CodeStream(directory=Path("/tmp"), files=[]), docs_stream=DocsStream( - readme="Use client_id and client_secret", contributing=None, docs_files=[] + readme="Use client_id and client_secret", + contributing=None, + docs_files=[], ), insights_stream=InsightsStream( metadata={"stars": 1000}, common_problems=[ - {"number": 42, "title": "OAuth parameter confusion", "labels": ["oauth"]} + { + "number": 42, + "title": "OAuth parameter confusion", + "labels": ["oauth"], + } ], known_solutions=[], top_labels=[], @@ -581,7 +651,9 @@ class TestScenario2MultiSource: ) # Create merger with required arguments - merger = RuleBasedMerger(docs_data=source1_data, github_data=source2_data, conflicts=[]) + merger = RuleBasedMerger( + docs_data=source1_data, github_data=source2_data, conflicts=[] + ) # Merge using merge_all() method merged = merger.merge_all() @@ -625,7 +697,8 @@ class TestScenario3LocalCodebase: # Create source files src_dir = project_dir / "src" src_dir.mkdir() - (src_dir / "database.py").write_text(""" + (src_dir / "database.py").write_text( + """ class DatabaseConnection: '''Database connection pool''' def __init__(self, host, port): @@ -635,9 +708,11 @@ class DatabaseConnection: def connect(self): '''Establish connection''' pass -""") +""" + ) - (src_dir / "api.py").write_text(""" + (src_dir / "api.py").write_text( + """ from flask import Flask app = Flask(__name__) @@ -646,16 +721,19 @@ app = Flask(__name__) def get_users(): '''Get all users''' return {'users': []} -""") +""" + ) # Create tests tests_dir = project_dir / "tests" tests_dir.mkdir() - (tests_dir / "test_database.py").write_text(""" + (tests_dir / "test_database.py").write_text( + """ def test_connection(): conn = DatabaseConnection('localhost', 5432) assert conn.host == 'localhost' -""") +""" + ) return project_dir @@ -692,8 +770,12 @@ def test_connection(): mock_c3x.return_value = { "files": ["database.py", "api.py"], "analysis_type": "c3x", - "c3_1_patterns": [{"name": "Singleton", "count": 1, "file": "database.py"}], - "c3_2_examples": [{"name": "test_connection", "file": "test_database.py"}], + "c3_1_patterns": [ + {"name": "Singleton", "count": 1, "file": "database.py"} + ], + "c3_2_examples": [ + {"name": "test_connection", "file": "test_database.py"} + ], "c3_2_examples_count": 1, "c3_3_guides": [], "c3_4_configs": [], @@ -885,7 +967,9 @@ Based on analysis of GitHub issues: print(f"\nGitHub overhead: {github_overhead} lines") # Architecture target: 20-60 lines - assert 20 <= github_overhead <= 60, f"GitHub overhead {github_overhead} not in range 20-60" + assert ( + 20 <= github_overhead <= 60 + ), f"GitHub overhead {github_overhead} not in range 20-60" def test_router_size_within_limits(self): """Test router size is 150±20 lines (Architecture Section 8.1, Line 1970).""" @@ -893,7 +977,9 @@ Based on analysis of GitHub issues: router_lines = 150 # Simulated count # Architecture target: 150 lines (±20) - assert 130 <= router_lines <= 170, f"Router size {router_lines} not in range 130-170" + assert ( + 130 <= router_lines <= 170 + ), f"Router size {router_lines} not in range 130-170" def test_content_quality_requirements(self): """Test content quality (Architecture Section 8.2, Lines 1977-2014).""" @@ -935,9 +1021,9 @@ provider = GitHubProvider(client_id="...", client_secret="...") # Check minimum 3 code examples code_blocks = sub_skill_md.count("```") - assert code_blocks >= 6, ( - f"Need at least 3 code examples (6 markers), found {code_blocks // 2}" - ) + assert ( + code_blocks >= 6 + ), f"Need at least 3 code examples (6 markers), found {code_blocks // 2}" # Check language tags assert "```python" in sub_skill_md, "Code blocks must have language tags" @@ -952,9 +1038,9 @@ provider = GitHubProvider(client_id="...", client_secret="...") # Check solution indicators for closed issues if "closed" in sub_skill_md.lower(): - assert "āœ…" in sub_skill_md or "Solution" in sub_skill_md, ( - "Closed issues should indicate solution found" - ) + assert ( + "āœ…" in sub_skill_md or "Solution" in sub_skill_md + ), "Closed issues should indicate solution found" class TestTokenEfficiencyCalculation: @@ -991,9 +1077,9 @@ class TestTokenEfficiencyCalculation: # With selective loading and caching, achieve 35-40% # Even conservative estimate shows 29.5%, actual usage patterns show 35-40% - assert reduction_percent >= 29, ( - f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)" - ) + assert ( + reduction_percent >= 29 + ), f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)" if __name__ == "__main__": diff --git a/tests/test_async_scraping.py b/tests/test_async_scraping.py index 51ba0de..51f0e1f 100644 --- a/tests/test_async_scraping.py +++ b/tests/test_async_scraping.py @@ -103,7 +103,9 @@ class TestAsyncScrapeMethods(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=True) self.assertTrue(hasattr(converter, "scrape_page_async")) - self.assertTrue(asyncio.iscoroutinefunction(converter.scrape_page_async)) + self.assertTrue( + asyncio.iscoroutinefunction(converter.scrape_page_async) + ) finally: os.chdir(self.original_cwd) @@ -177,9 +179,12 @@ class TestAsyncRouting(unittest.TestCase): converter = DocToSkillConverter(config, dry_run=True) # Mock scrape_all_async to verify it does NOT get called - with patch.object( - converter, "scrape_all_async", new_callable=AsyncMock - ) as mock_async, patch.object(converter, "_try_llms_txt", return_value=False): + with ( + patch.object( + converter, "scrape_all_async", new_callable=AsyncMock + ) as mock_async, + patch.object(converter, "_try_llms_txt", return_value=False), + ): converter.scrape_all() # Verify async version was NOT called mock_async.assert_not_called() @@ -258,7 +263,9 @@ class TestAsyncErrorHandling(unittest.TestCase): async with httpx.AsyncClient() as client: # Mock client.get to raise exception - with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")): + with patch.object( + client, "get", side_effect=httpx.HTTPError("Test error") + ): # Should not raise exception, just log error await converter.scrape_page_async( "https://example.com/test", semaphore, client @@ -316,7 +323,10 @@ class TestAsyncLlmsTxtIntegration(unittest.TestCase): converter = DocToSkillConverter(config, dry_run=False) # Mock _try_llms_txt to return True (llms.txt found) - with patch.object(converter, "_try_llms_txt", return_value=True), patch.object(converter, "save_summary"): + with ( + patch.object(converter, "_try_llms_txt", return_value=True), + patch.object(converter, "save_summary"), + ): converter.scrape_all() # If llms.txt succeeded, async scraping should be skipped # Verify by checking that pages were not scraped diff --git a/tests/test_github_scraper.py b/tests/test_github_scraper.py index 935b35c..032a3f9 100644 --- a/tests/test_github_scraper.py +++ b/tests/test_github_scraper.py @@ -62,7 +62,11 @@ class TestGitHubScraperInitialization(unittest.TestCase): def test_init_with_token_from_config(self): """Test initialization with token from config""" - config = {"repo": "facebook/react", "name": "react", "github_token": "test_token_123"} + config = { + "repo": "facebook/react", + "name": "react", + "github_token": "test_token_123", + } with patch("skill_seekers.cli.github_scraper.Github") as mock_github: _scraper = self.GitHubScraper(config) @@ -72,7 +76,10 @@ class TestGitHubScraperInitialization(unittest.TestCase): """Test initialization with token from environment variable""" config = {"repo": "facebook/react", "name": "react", "github_token": None} - with patch.dict(os.environ, {"GITHUB_TOKEN": "env_token_456"}), patch("skill_seekers.cli.github_scraper.Github") as mock_github: + with ( + patch.dict(os.environ, {"GITHUB_TOKEN": "env_token_456"}), + patch("skill_seekers.cli.github_scraper.Github") as mock_github, + ): _scraper = self.GitHubScraper(config) mock_github.assert_called_once_with("env_token_456") @@ -80,14 +87,21 @@ class TestGitHubScraperInitialization(unittest.TestCase): """Test initialization without authentication""" config = {"repo": "facebook/react", "name": "react", "github_token": None} - with patch("skill_seekers.cli.github_scraper.Github"), patch.dict(os.environ, {}, clear=True): + with ( + patch("skill_seekers.cli.github_scraper.Github"), + patch.dict(os.environ, {}, clear=True), + ): scraper = self.GitHubScraper(config) # Should create unauthenticated client self.assertIsNotNone(scraper.github) def test_token_priority_env_over_config(self): """Test that GITHUB_TOKEN env var takes priority over config""" - config = {"repo": "facebook/react", "name": "react", "github_token": "config_token"} + config = { + "repo": "facebook/react", + "name": "react", + "github_token": "config_token", + } with patch.dict(os.environ, {"GITHUB_TOKEN": "env_token"}): scraper = self.GitHubScraper(config) @@ -120,7 +134,9 @@ class TestREADMEExtraction(unittest.TestCase): scraper._extract_readme() self.assertIn("readme", scraper.extracted_data) - self.assertEqual(scraper.extracted_data["readme"], "# React\n\nA JavaScript library") + self.assertEqual( + scraper.extracted_data["readme"], "# React\n\nA JavaScript library" + ) def test_extract_readme_tries_multiple_locations(self): """Test that README extraction tries multiple file locations""" @@ -177,7 +193,10 @@ class TestLanguageDetection(unittest.TestCase): with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) scraper.repo = Mock() - scraper.repo.get_languages.return_value = {"JavaScript": 8000, "TypeScript": 2000} + scraper.repo.get_languages.return_value = { + "JavaScript": 8000, + "TypeScript": 2000, + } scraper._extract_languages() @@ -221,7 +240,12 @@ class TestIssuesExtraction(unittest.TestCase): def test_extract_issues_success(self): """Test successful issues extraction""" - config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 10} + config = { + "repo": "facebook/react", + "name": "react", + "github_token": None, + "max_issues": 10, + } # Create mock issues mock_label1 = Mock() @@ -286,7 +310,12 @@ class TestIssuesExtraction(unittest.TestCase): def test_extract_issues_filters_pull_requests(self): """Test that pull requests are filtered out from issues""" - config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 10} + config = { + "repo": "facebook/react", + "name": "react", + "github_token": None, + "max_issues": 10, + } # Create mock issue (need all required attributes) mock_issue = Mock() @@ -321,7 +350,12 @@ class TestIssuesExtraction(unittest.TestCase): def test_extract_issues_respects_max_limit(self): """Test that max_issues limit is respected""" - config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 2} + config = { + "repo": "facebook/react", + "name": "react", + "github_token": None, + "max_issues": 2, + } # Create 5 mock issues mock_issues = [] @@ -443,9 +477,15 @@ class TestReleasesExtraction(unittest.TestCase): mock_release1.prerelease = False mock_release1.created_at = datetime(2023, 3, 1) mock_release1.published_at = datetime(2023, 3, 1) - mock_release1.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0" - mock_release1.tarball_url = "https://github.com/facebook/react/archive/v18.0.0.tar.gz" - mock_release1.zipball_url = "https://github.com/facebook/react/archive/v18.0.0.zip" + mock_release1.html_url = ( + "https://github.com/facebook/react/releases/tag/v18.0.0" + ) + mock_release1.tarball_url = ( + "https://github.com/facebook/react/archive/v18.0.0.tar.gz" + ) + mock_release1.zipball_url = ( + "https://github.com/facebook/react/archive/v18.0.0.zip" + ) mock_release2 = Mock() mock_release2.tag_name = "v18.0.0-rc.0" @@ -455,9 +495,15 @@ class TestReleasesExtraction(unittest.TestCase): mock_release2.prerelease = True mock_release2.created_at = datetime(2023, 2, 1) mock_release2.published_at = datetime(2023, 2, 1) - mock_release2.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0" - mock_release2.tarball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz" - mock_release2.zipball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip" + mock_release2.html_url = ( + "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0" + ) + mock_release2.tarball_url = ( + "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz" + ) + mock_release2.zipball_url = ( + "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip" + ) with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) @@ -566,7 +612,9 @@ class TestGitHubToSkillConverter(unittest.TestCase): config = {"repo": "facebook/react", "name": "test", "description": "Test skill"} # Override data file path - with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__") as mock_init: + with patch( + "skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__" + ) as mock_init: mock_init.return_value = None converter = self.GitHubToSkillConverter(config) converter.data_file = str(self.data_file) @@ -733,7 +781,8 @@ class TestSymlinkHandling(unittest.TestCase): # Should successfully extract README content self.assertIn("readme", scraper.extracted_data) self.assertEqual( - scraper.extracted_data["readme"], "# AI SDK\n\nThe AI SDK is a TypeScript toolkit" + scraper.extracted_data["readme"], + "# AI SDK\n\nThe AI SDK is a TypeScript toolkit", ) def test_extract_changelog_with_symlink(self): @@ -815,7 +864,8 @@ class TestSymlinkHandling(unittest.TestCase): # Should download via download_url self.assertEqual(result, "# Changelog\n\n## v1.0.0\n- Initial release") mock_requests.assert_called_once_with( - "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md", timeout=30 + "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md", + timeout=30, ) def test_extract_changelog_large_file(self): @@ -950,7 +1000,9 @@ class TestErrorHandling(unittest.TestCase): with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) scraper.repo = None - scraper.github.get_repo = Mock(side_effect=GithubException(404, "Not found")) + scraper.github.get_repo = Mock( + side_effect=GithubException(404, "Not found") + ) # Should raise ValueError with helpful message with self.assertRaises(ValueError) as context: @@ -960,12 +1012,19 @@ class TestErrorHandling(unittest.TestCase): def test_rate_limit_error(self): """Test handling of rate limit errors""" - config = {"repo": "facebook/react", "name": "react", "github_token": None, "max_issues": 10} + config = { + "repo": "facebook/react", + "name": "react", + "github_token": None, + "max_issues": 10, + } with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) scraper.repo = Mock() - scraper.repo.get_issues.side_effect = GithubException(403, "Rate limit exceeded") + scraper.repo.get_issues.side_effect = GithubException( + 403, "Rate limit exceeded" + ) # Should handle gracefully and log warning scraper._extract_issues() diff --git a/tests/test_guide_enhancer.py b/tests/test_guide_enhancer.py index e4546e6..07c78e4 100644 --- a/tests/test_guide_enhancer.py +++ b/tests/test_guide_enhancer.py @@ -28,9 +28,13 @@ class TestGuideEnhancerModeDetection: def test_auto_mode_with_api_key(self): """Test auto mode detects API when key present and library available""" - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="auto") # Will be 'api' if library available, otherwise 'local' or 'none' @@ -80,7 +84,12 @@ class TestGuideEnhancerStepDescriptions: def test_enhance_step_descriptions_none_mode(self): """Test step descriptions in none mode returns empty""" enhancer = GuideEnhancer(mode="none") - steps = [{"description": "scraper.scrape(url)", "code": "result = scraper.scrape(url)"}] + steps = [ + { + "description": "scraper.scrape(url)", + "code": "result = scraper.scrape(url)", + } + ] result = enhancer.enhance_step_descriptions(steps) assert result == [] @@ -99,9 +108,13 @@ class TestGuideEnhancerStepDescriptions: } ) - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") if enhancer.mode != "api": @@ -163,9 +176,13 @@ class TestGuideEnhancerTroubleshooting: } ) - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") if enhancer.mode != "api": @@ -175,7 +192,9 @@ class TestGuideEnhancerTroubleshooting: guide_data = { "title": "Test Guide", - "steps": [{"description": "import requests", "code": "import requests"}], + "steps": [ + {"description": "import requests", "code": "import requests"} + ], "language": "python", } result = enhancer.enhance_troubleshooting(guide_data) @@ -224,9 +243,13 @@ class TestGuideEnhancerPrerequisites: } ) - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") if enhancer.mode != "api": @@ -267,9 +290,13 @@ class TestGuideEnhancerNextSteps: } ) - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") if enhancer.mode != "api": @@ -277,7 +304,10 @@ class TestGuideEnhancerNextSteps: enhancer.client = Mock() - guide_data = {"title": "How to Scrape Docs", "description": "Basic scraping"} + guide_data = { + "title": "How to Scrape Docs", + "description": "Basic scraping", + } result = enhancer.enhance_next_steps(guide_data) assert len(result) == 3 @@ -307,9 +337,13 @@ class TestGuideEnhancerUseCases: } ) - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") if enhancer.mode != "api": @@ -358,7 +392,11 @@ class TestGuideEnhancerFullWorkflow: mock_call.return_value = json.dumps( { "step_descriptions": [ - {"step_index": 0, "explanation": "Import required libraries", "variations": []}, + { + "step_index": 0, + "explanation": "Import required libraries", + "variations": [], + }, { "step_index": 1, "explanation": "Initialize scraper instance", @@ -374,16 +412,24 @@ class TestGuideEnhancerFullWorkflow: } ], "prerequisites_detailed": [ - {"name": "requests", "why": "HTTP client", "setup": "pip install requests"} + { + "name": "requests", + "why": "HTTP client", + "setup": "pip install requests", + } ], "next_steps": ["How to add authentication"], "use_cases": ["Automate documentation extraction"], } ) - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), + patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), + patch( + "skill_seekers.cli.guide_enhancer.anthropic", create=True + ) as mock_anthropic, + ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") if enhancer.mode != "api": @@ -415,7 +461,9 @@ class TestGuideEnhancerFullWorkflow: """Test graceful fallback on enhancement error""" enhancer = GuideEnhancer(mode="none") - with patch.object(enhancer, "enhance_guide", side_effect=Exception("API error")): + with patch.object( + enhancer, "enhance_guide", side_effect=Exception("API error") + ): guide_data = { "title": "Test", "steps": [], @@ -485,7 +533,9 @@ class TestGuideEnhancerPromptGeneration: guide_data = { "title": "How to Test", - "steps": [{"description": "Write test", "code": "def test_example(): pass"}], + "steps": [ + {"description": "Write test", "code": "def test_example(): pass"} + ], "language": "python", "prerequisites": ["pytest"], } @@ -533,7 +583,9 @@ class TestGuideEnhancerResponseParsing: response = json.dumps( { - "step_descriptions": [{"step_index": 0, "explanation": "Test", "variations": []}], + "step_descriptions": [ + {"step_index": 0, "explanation": "Test", "variations": []} + ], "troubleshooting": [], "prerequisites_detailed": [], "next_steps": [], diff --git a/tests/test_install_agent.py b/tests/test_install_agent.py index c60022d..6d5e00b 100644 --- a/tests/test_install_agent.py +++ b/tests/test_install_agent.py @@ -174,7 +174,9 @@ class TestInstallToAgent: self.skill_dir.mkdir() # Create SKILL.md - (self.skill_dir / "SKILL.md").write_text("# Test Skill\n\nThis is a test skill.") + (self.skill_dir / "SKILL.md").write_text( + "# Test Skill\n\nThis is a test skill." + ) # Create references directory with files refs_dir = self.skill_dir / "references" @@ -195,8 +197,13 @@ class TestInstallToAgent: with tempfile.TemporaryDirectory() as agent_tmpdir: agent_path = Path(agent_tmpdir) / ".claude" / "skills" - with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path): - success, message = install_to_agent(self.skill_dir, "claude", force=True) + with patch( + "skill_seekers.cli.install_agent.get_agent_path", + return_value=agent_path, + ): + success, message = install_to_agent( + self.skill_dir, "claude", force=True + ) assert success is True target_path = agent_path / "test-skill" @@ -208,8 +215,13 @@ class TestInstallToAgent: with tempfile.TemporaryDirectory() as agent_tmpdir: agent_path = Path(agent_tmpdir) / ".claude" / "skills" - with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path): - success, message = install_to_agent(self.skill_dir, "claude", force=True) + with patch( + "skill_seekers.cli.install_agent.get_agent_path", + return_value=agent_path, + ): + success, message = install_to_agent( + self.skill_dir, "claude", force=True + ) assert success is True target_path = agent_path / "test-skill" @@ -230,8 +242,13 @@ class TestInstallToAgent: with tempfile.TemporaryDirectory() as agent_tmpdir: agent_path = Path(agent_tmpdir) / ".claude" / "skills" - with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path): - success, message = install_to_agent(self.skill_dir, "claude", force=True) + with patch( + "skill_seekers.cli.install_agent.get_agent_path", + return_value=agent_path, + ): + success, message = install_to_agent( + self.skill_dir, "claude", force=True + ) assert success is True target_path = agent_path / "test-skill" @@ -248,8 +265,13 @@ class TestInstallToAgent: target_path = agent_path / "test-skill" target_path.mkdir(parents=True) - with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path): - success, message = install_to_agent(self.skill_dir, "claude", force=False) + with patch( + "skill_seekers.cli.install_agent.get_agent_path", + return_value=agent_path, + ): + success, message = install_to_agent( + self.skill_dir, "claude", force=False + ) assert success is False assert "already installed" in message.lower() @@ -263,8 +285,13 @@ class TestInstallToAgent: target_path.mkdir(parents=True) (target_path / "old_file.txt").write_text("old content") - with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path): - success, message = install_to_agent(self.skill_dir, "claude", force=True) + with patch( + "skill_seekers.cli.install_agent.get_agent_path", + return_value=agent_path, + ): + success, message = install_to_agent( + self.skill_dir, "claude", force=True + ) assert success is True # Old file should be gone @@ -297,8 +324,13 @@ class TestInstallToAgent: with tempfile.TemporaryDirectory() as agent_tmpdir: agent_path = Path(agent_tmpdir) / ".claude" / "skills" - with patch("skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path): - success, message = install_to_agent(self.skill_dir, "claude", dry_run=True) + with patch( + "skill_seekers.cli.install_agent.get_agent_path", + return_value=agent_path, + ): + success, message = install_to_agent( + self.skill_dir, "claude", dry_run=True + ) assert success is True assert "DRY RUN" in message @@ -329,7 +361,8 @@ class TestInstallToAllAgents: return Path(agent_tmpdir) / f".{agent_name}" / "skills" with patch( - "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path + "skill_seekers.cli.install_agent.get_agent_path", + side_effect=mock_get_agent_path, ): results = install_to_all_agents(self.skill_dir, force=True) @@ -360,7 +393,8 @@ class TestInstallToAllAgents: return Path(agent_tmpdir) / f".{agent_name}" / "skills" with patch( - "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path + "skill_seekers.cli.install_agent.get_agent_path", + side_effect=mock_get_agent_path, ): # Without force - should fail results_no_force = install_to_all_agents(self.skill_dir, force=False) @@ -415,7 +449,10 @@ class TestInstallAgentCLI: def test_cli_requires_agent_flag(self): """Test that CLI fails without --agent flag.""" - with pytest.raises(SystemExit) as exc_info, patch("sys.argv", ["install_agent.py", str(self.skill_dir)]): + with ( + pytest.raises(SystemExit) as exc_info, + patch("sys.argv", ["install_agent.py", str(self.skill_dir)]), + ): main() # Missing required argument exits with code 2 @@ -428,17 +465,29 @@ class TestInstallAgentCLI: def mock_get_agent_path(agent_name, _project_root=None): return Path(agent_tmpdir) / f".{agent_name}" / "skills" - with patch( - "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path - ), patch( - "sys.argv", - ["install_agent.py", str(self.skill_dir), "--agent", "claude", "--dry-run"], + with ( + patch( + "skill_seekers.cli.install_agent.get_agent_path", + side_effect=mock_get_agent_path, + ), + patch( + "sys.argv", + [ + "install_agent.py", + str(self.skill_dir), + "--agent", + "claude", + "--dry-run", + ], + ), ): exit_code = main() assert exit_code == 0 # Directory should NOT be created - assert not (Path(agent_tmpdir) / ".claude" / "skills" / "test-skill").exists() + assert not ( + Path(agent_tmpdir) / ".claude" / "skills" / "test-skill" + ).exists() def test_cli_integration(self): """Test end-to-end CLI execution.""" @@ -447,11 +496,21 @@ class TestInstallAgentCLI: def mock_get_agent_path(agent_name, _project_root=None): return Path(agent_tmpdir) / f".{agent_name}" / "skills" - with patch( - "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path - ), patch( - "sys.argv", - ["install_agent.py", str(self.skill_dir), "--agent", "claude", "--force"], + with ( + patch( + "skill_seekers.cli.install_agent.get_agent_path", + side_effect=mock_get_agent_path, + ), + patch( + "sys.argv", + [ + "install_agent.py", + str(self.skill_dir), + "--agent", + "claude", + "--force", + ], + ), ): exit_code = main() @@ -468,11 +527,21 @@ class TestInstallAgentCLI: def mock_get_agent_path(agent_name, _project_root=None): return Path(agent_tmpdir) / f".{agent_name}" / "skills" - with patch( - "skill_seekers.cli.install_agent.get_agent_path", side_effect=mock_get_agent_path - ), patch( - "sys.argv", - ["install_agent.py", str(self.skill_dir), "--agent", "all", "--force"], + with ( + patch( + "skill_seekers.cli.install_agent.get_agent_path", + side_effect=mock_get_agent_path, + ), + patch( + "sys.argv", + [ + "install_agent.py", + str(self.skill_dir), + "--agent", + "all", + "--force", + ], + ), ): exit_code = main() diff --git a/tests/test_issue_219_e2e.py b/tests/test_issue_219_e2e.py index b1aa978..07852b5 100644 --- a/tests/test_issue_219_e2e.py +++ b/tests/test_issue_219_e2e.py @@ -50,7 +50,9 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase): # Mock large CHANGELOG (1.4MB, encoding="none") mock_content = Mock() mock_content.type = "file" - mock_content.encoding = "none" # This is what GitHub API returns for large files + mock_content.encoding = ( + "none" # This is what GitHub API returns for large files + ) mock_content.size = 1388271 mock_content.download_url = ( "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md" @@ -73,13 +75,16 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase): # VERIFY: download_url was called mock_requests.assert_called_once_with( - "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md", timeout=30 + "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md", + timeout=30, ) # VERIFY: CHANGELOG was extracted successfully self.assertIn("changelog", scraper.extracted_data) self.assertIn("Bug fixes", scraper.extracted_data["changelog"]) - self.assertEqual(scraper.extracted_data["changelog"], mock_response.text) + self.assertEqual( + scraper.extracted_data["changelog"], mock_response.text + ) def test_large_file_fallback_on_error(self): """E2E: Verify graceful handling if download_url fails""" @@ -179,7 +184,8 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase): # VERIFY: sys.argv contains --enhance-local flag # (main.py should have added it before calling github_scraper) called_with_enhance = any( - "--enhance-local" in str(call) for call in mock_github_main.call_args_list + "--enhance-local" in str(call) + for call in mock_github_main.call_args_list ) self.assertTrue( called_with_enhance or "--enhance-local" in sys.argv, @@ -220,9 +226,12 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): with ( patch.dict( - os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url} + os.environ, + {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}, ), - patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic, + patch( + "skill_seekers.cli.enhance_skill.anthropic.Anthropic" + ) as mock_anthropic, ): # Create enhancer _enhancer = SkillEnhancer(self.skill_dir) @@ -249,7 +258,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): # Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY with ( patch.dict(os.environ, {"ANTHROPIC_AUTH_TOKEN": custom_token}, clear=True), - patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic, + patch( + "skill_seekers.cli.enhance_skill.anthropic.Anthropic" + ) as mock_anthropic, ): # Create enhancer (should accept ANTHROPIC_AUTH_TOKEN) enhancer = SkillEnhancer(self.skill_dir) @@ -265,7 +276,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): mock_anthropic.assert_called_once() call_kwargs = mock_anthropic.call_args[1] self.assertEqual( - call_kwargs["api_key"], custom_token, "api_key should match ANTHROPIC_AUTH_TOKEN" + call_kwargs["api_key"], + custom_token, + "api_key should match ANTHROPIC_AUTH_TOKEN", ) def test_thinking_block_handling(self): @@ -275,7 +288,12 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): except ImportError: self.skipTest("anthropic package not installed") - with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}), patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic: + with ( + patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}), + patch( + "skill_seekers.cli.enhance_skill.anthropic.Anthropic" + ) as mock_anthropic, + ): enhancer = SkillEnhancer(self.skill_dir) # Mock response with ThinkingBlock (newer SDK) @@ -283,7 +301,9 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): mock_thinking_block = SimpleNamespace(type="thinking") # TextBlock has .text attribute - mock_text_block = SimpleNamespace(text="# Enhanced SKILL.md\n\nContent here") + mock_text_block = SimpleNamespace( + text="# Enhanced SKILL.md\n\nContent here" + ) mock_message = Mock() mock_message.content = [mock_thinking_block, mock_text_block] diff --git a/tests/test_llms_txt_downloader.py b/tests/test_llms_txt_downloader.py index 06b1768..a6a8042 100644 --- a/tests/test_llms_txt_downloader.py +++ b/tests/test_llms_txt_downloader.py @@ -30,7 +30,12 @@ def test_timeout_with_retry(): """Test timeout scenario with retry logic""" downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2) - with patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, patch("time.sleep") as mock_sleep: # Mock sleep to speed up test + with ( + patch( + "requests.get", side_effect=requests.Timeout("Connection timeout") + ) as mock_get, + patch("time.sleep") as mock_sleep, + ): # Mock sleep to speed up test content = downloader.download() assert content is None @@ -73,7 +78,10 @@ def test_http_error_handling(): mock_response = Mock() mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found") - with patch("requests.get", return_value=mock_response) as mock_get, patch("time.sleep"): + with ( + patch("requests.get", return_value=mock_response) as mock_get, + patch("time.sleep"), + ): content = downloader.download() assert content is None @@ -84,7 +92,10 @@ def test_exponential_backoff(): """Test that exponential backoff delays are correct""" downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=3) - with patch("requests.get", side_effect=requests.Timeout("Connection timeout")), patch("time.sleep") as mock_sleep: + with ( + patch("requests.get", side_effect=requests.Timeout("Connection timeout")), + patch("time.sleep") as mock_sleep, + ): content = downloader.download() assert content is None @@ -132,7 +143,9 @@ def test_custom_max_retries(): downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5) with ( - patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, + patch( + "requests.get", side_effect=requests.Timeout("Connection timeout") + ) as mock_get, patch("time.sleep"), ): content = downloader.download() @@ -190,9 +203,7 @@ def test_is_markdown_rejects_html_doctype(): """Test that HTML with DOCTYPE is rejected (prevents redirect trap)""" downloader = LlmsTxtDownloader("https://example.com/llms.txt") - html = ( - "Product PageContent" - ) + html = "Product PageContent" assert not downloader._is_markdown(html) # Test case-insensitive @@ -219,7 +230,9 @@ def test_is_markdown_rejects_html_meta(): html_with_head = "PageContent" assert not downloader._is_markdown(html_with_head) - html_with_meta = '' + html_with_meta = ( + '' + ) assert not downloader._is_markdown(html_with_meta) @@ -231,7 +244,9 @@ def test_is_markdown_accepts_markdown_with_html_words(): assert downloader._is_markdown(markdown) # Test with actual markdown patterns - markdown_with_code = "# HTML Tutorial\n\n```html\n
example
\n```\n\n## More content" + markdown_with_code = ( + "# HTML Tutorial\n\n```html\n
example
\n```\n\n## More content" + ) assert downloader._is_markdown(markdown_with_code) @@ -240,7 +255,9 @@ def test_html_detection_only_scans_first_500_chars(): downloader = LlmsTxtDownloader("https://example.com/llms.txt") # HTML tag after 500 chars should not be detected - safe_markdown = "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n" + safe_markdown = ( + "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n" + ) # This should pass because is beyond first 500 chars if len(safe_markdown[:500]) < len(""): # If the HTML is within 500 chars, adjust test @@ -277,7 +294,9 @@ def test_download_rejects_html_redirect(): mock_response = Mock() # Simulate server returning HTML instead of markdown - mock_response.text = "

Product Page

" + mock_response.text = ( + "

Product Page

" + ) mock_response.raise_for_status = Mock() with patch("requests.get", return_value=mock_response): diff --git a/tests/test_skip_llms_txt.py b/tests/test_skip_llms_txt.py index 8444766..4ba23b0 100644 --- a/tests/test_skip_llms_txt.py +++ b/tests/test_skip_llms_txt.py @@ -72,7 +72,13 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=False) - with patch.object(converter, "_try_llms_txt", return_value=False) as mock_try, patch.object(converter, "scrape_page"), patch.object(converter, "save_summary"): + with ( + patch.object( + converter, "_try_llms_txt", return_value=False + ) as mock_try, + patch.object(converter, "scrape_page"), + patch.object(converter, "save_summary"), + ): converter.scrape_all() mock_try.assert_called_once() finally: @@ -93,7 +99,11 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=False) - with patch.object(converter, "_try_llms_txt") as mock_try, patch.object(converter, "scrape_page"), patch.object(converter, "save_summary"): + with ( + patch.object(converter, "_try_llms_txt") as mock_try, + patch.object(converter, "scrape_page"), + patch.object(converter, "save_summary"), + ): converter.scrape_all() mock_try.assert_not_called() finally: @@ -114,7 +124,10 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=True) - with patch.object(converter, "_try_llms_txt") as mock_try, patch.object(converter, "save_summary"): + with ( + patch.object(converter, "_try_llms_txt") as mock_try, + patch.object(converter, "save_summary"), + ): converter.scrape_all() mock_try.assert_not_called() finally: @@ -140,7 +153,13 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=False) - with patch.object(converter, "_try_llms_txt", return_value=False) as mock_try, patch.object(converter, "scrape_page_async", return_value=None), patch.object(converter, "save_summary"): + with ( + patch.object( + converter, "_try_llms_txt", return_value=False + ) as mock_try, + patch.object(converter, "scrape_page_async", return_value=None), + patch.object(converter, "save_summary"), + ): converter.scrape_all() mock_try.assert_called_once() finally: @@ -162,7 +181,11 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=False) - with patch.object(converter, "_try_llms_txt") as mock_try, patch.object(converter, "scrape_page_async", return_value=None), patch.object(converter, "save_summary"): + with ( + patch.object(converter, "_try_llms_txt") as mock_try, + patch.object(converter, "scrape_page_async", return_value=None), + patch.object(converter, "save_summary"), + ): converter.scrape_all() mock_try.assert_not_called() finally: @@ -179,7 +202,10 @@ class TestSkipLlmsTxtWithRealConfig(unittest.TestCase): "description": "Telegram bot documentation", "base_url": "https://core.telegram.org/bots", "skip_llms_txt": True, # Telegram doesn't have useful llms.txt - "start_urls": ["https://core.telegram.org/bots", "https://core.telegram.org/bots/api"], + "start_urls": [ + "https://core.telegram.org/bots", + "https://core.telegram.org/bots/api", + ], "selectors": { "main_content": "#dev_page_content, main, article", "title": "h1, title", @@ -226,7 +252,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue(any("Invalid value" in log and "0" in log for log in cm.output)) + self.assertTrue( + any("Invalid value" in log and "0" in log for log in cm.output) + ) def test_skip_llms_txt_with_int_one_logs_warning(self): """Test that integer 1 logs warning and defaults to False.""" @@ -240,7 +268,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue(any("Invalid value" in log and "1" in log for log in cm.output)) + self.assertTrue( + any("Invalid value" in log and "1" in log for log in cm.output) + ) def test_skip_llms_txt_with_string_logs_warning(self): """Test that string values log warning and default to False.""" @@ -254,7 +284,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue(any("Invalid value" in log and "true" in log for log in cm.output)) + self.assertTrue( + any("Invalid value" in log and "true" in log for log in cm.output) + ) def test_skip_llms_txt_with_none_logs_warning(self): """Test that None logs warning and defaults to False.""" @@ -268,7 +300,9 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue(any("Invalid value" in log and "None" in log for log in cm.output)) + self.assertTrue( + any("Invalid value" in log and "None" in log for log in cm.output) + ) def test_scraping_proceeds_when_llms_txt_skipped(self): """Test that HTML scraping proceeds normally when llms.txt is skipped.""" @@ -292,7 +326,10 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): scrape_called.append(url) return None - with patch.object(converter, "scrape_page", side_effect=mock_scrape), patch.object(converter, "save_summary"): + with ( + patch.object(converter, "scrape_page", side_effect=mock_scrape), + patch.object(converter, "save_summary"), + ): converter.scrape_all() # Should have attempted to scrape the base URL self.assertTrue(len(scrape_called) > 0)