From 85c8d9d385edeab23f91aeb89f281a3777d37115 Mon Sep 17 00:00:00 2001 From: yusyus Date: Sun, 18 Jan 2026 00:01:30 +0300 Subject: [PATCH] style: Run ruff format on 15 files (CI fix) CI uses 'ruff format' not 'black' - applied proper formatting: Files reformatted by ruff: - config_extractor.py - doc_scraper.py - how_to_guide_builder.py - llms_txt_parser.py - pattern_recognizer.py - test_example_extractor.py - unified_codebase_analyzer.py - test_architecture_scenarios.py - test_async_scraping.py - test_github_scraper.py - test_guide_enhancer.py - test_install_agent.py - test_issue_219_e2e.py - test_llms_txt_downloader.py - test_skip_llms_txt.py Fixes CI formatting check failure. Co-Authored-By: Claude Sonnet 4.5 --- src/skill_seekers/cli/config_extractor.py | 35 +--- src/skill_seekers/cli/doc_scraper.py | 165 +++++------------- src/skill_seekers/cli/how_to_guide_builder.py | 53 ++---- src/skill_seekers/cli/llms_txt_parser.py | 8 +- src/skill_seekers/cli/pattern_recognizer.py | 62 ++----- .../cli/test_example_extractor.py | 61 ++----- .../cli/unified_codebase_analyzer.py | 28 +-- tests/test_architecture_scenarios.py | 74 +++----- tests/test_async_scraping.py | 8 +- tests/test_github_scraper.py | 40 ++--- tests/test_guide_enhancer.py | 44 ++--- tests/test_install_agent.py | 32 +--- tests/test_issue_219_e2e.py | 27 +-- tests/test_llms_txt_downloader.py | 28 +-- tests/test_skip_llms_txt.py | 24 +-- 15 files changed, 179 insertions(+), 510 deletions(-) diff --git a/src/skill_seekers/cli/config_extractor.py b/src/skill_seekers/cli/config_extractor.py index 133ee58..688dde2 100644 --- a/src/skill_seekers/cli/config_extractor.py +++ b/src/skill_seekers/cli/config_extractor.py @@ -89,9 +89,7 @@ class ConfigExtractionResult: config_files: list[ConfigFile] = field(default_factory=list) total_files: int = 0 total_settings: int = 0 - detected_patterns: dict[str, list[str]] = field( - default_factory=dict - ) # pattern -> files + detected_patterns: dict[str, list[str]] = field(default_factory=dict) # pattern -> files errors: list[str] = field(default_factory=list) def to_dict(self) -> dict: @@ -241,9 +239,7 @@ class ConfigFileDetector: "*.egg-info", } - def find_config_files( - self, directory: Path, max_files: int = 100 - ) -> list[ConfigFile]: + def find_config_files(self, directory: Path, max_files: int = 100) -> list[ConfigFile]: """ Find all configuration files in directory. @@ -314,10 +310,7 @@ class ConfigFileDetector: filename = file_path.name.lower() # Database configs - if any( - word in path_lower - for word in ["database", "db", "postgres", "mysql", "mongo"] - ): + if any(word in path_lower for word in ["database", "db", "postgres", "mysql", "mongo"]): return "database_configuration" # API configs @@ -333,9 +326,7 @@ class ConfigFileDetector: return "docker_configuration" # CI/CD configs - if any( - word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"] - ): + if any(word in path_lower for word in [".travis", ".gitlab", ".github", "ci", "cd"]): return "ci_cd_configuration" # Package configs @@ -347,11 +338,7 @@ class ConfigFileDetector: return "typescript_configuration" # Framework configs - if ( - "next.config" in filename - or "vue.config" in filename - or "webpack.config" in filename - ): + if "next.config" in filename or "vue.config" in filename or "webpack.config" in filename: return "framework_configuration" # Environment configs @@ -531,9 +518,7 @@ class ConfigParser: for match in re.finditer(pattern, config_file.raw_content): if len(match.groups()) >= 2: key = match.group(1) - value = ( - match.group(3) if len(match.groups()) > 2 else match.group(2) - ) + value = match.group(3) if len(match.groups()) > 2 else match.group(2) setting = ConfigSetting( key=key, value=value, value_type=self._infer_type(value) @@ -579,9 +564,7 @@ class ConfigParser: for key, value in data.items(): if isinstance(value, dict): # Recurse into nested dicts - self._extract_settings_from_dict( - value, config_file, parent_path + [key] - ) + self._extract_settings_from_dict(value, config_file, parent_path + [key]) else: setting = ConfigSetting( key=".".join(parent_path + [key]) if parent_path else key, @@ -872,9 +855,7 @@ def main(): print("\nšŸ“Š Summary:") print(f" Config files found: {result.total_files}") print(f" Total settings: {result.total_settings}") - print( - f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}" - ) + print(f" Detected patterns: {', '.join(result.detected_patterns.keys()) or 'None'}") if "ai_enhancements" in output_dict: print(f" ✨ AI enhancements: Yes ({enhance_mode} mode)") diff --git a/src/skill_seekers/cli/doc_scraper.py b/src/skill_seekers/cli/doc_scraper.py index 2ef1d6c..e0cc036 100755 --- a/src/skill_seekers/cli/doc_scraper.py +++ b/src/skill_seekers/cli/doc_scraper.py @@ -148,9 +148,7 @@ def infer_description_from_docs( class DocToSkillConverter: - def __init__( - self, config: dict[str, Any], dry_run: bool = False, resume: bool = False - ) -> None: + def __init__(self, config: dict[str, Any], dry_run: bool = False, resume: bool = False) -> None: self.config = config self.name = config["name"] self.base_url = config["base_url"] @@ -165,9 +163,7 @@ class DocToSkillConverter: # Checkpoint config checkpoint_config = config.get("checkpoint", {}) self.checkpoint_enabled = checkpoint_config.get("enabled", False) - self.checkpoint_interval = checkpoint_config.get( - "interval", DEFAULT_CHECKPOINT_INTERVAL - ) + self.checkpoint_interval = checkpoint_config.get("interval", DEFAULT_CHECKPOINT_INTERVAL) # llms.txt detection state skip_llms_txt_value = config.get("skip_llms_txt", False) @@ -322,9 +318,7 @@ class DocToSkillConverter: for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): text = self.clean_text(h.get_text()) if text: - page["headings"].append( - {"level": h.name, "text": text, "id": h.get("id", "")} - ) + page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")}) # Extract code with language detection code_selector = selectors.get("code_blocks", "pre code") @@ -391,9 +385,7 @@ class DocToSkillConverter: import re # Detect if content is actually HTML (some .md URLs return HTML) - if content.strip().startswith(" 10: - page["code_samples"].append( - {"code": code.strip(), "language": lang or "unknown"} - ) + page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"}) # Extract content (paragraphs) content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL) @@ -458,11 +448,7 @@ class DocToSkillConverter: # Strip anchor fragments full_url = full_url.split("#")[0] # Only include .md URLs to avoid client-side rendered HTML pages - if ( - ".md" in full_url - and self.is_valid_url(full_url) - and full_url not in page["links"] - ): + if ".md" in full_url and self.is_valid_url(full_url) and full_url not in page["links"]: page["links"].append(full_url) return page @@ -526,18 +512,14 @@ class DocToSkillConverter: for h in main.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]): text = self.clean_text(h.get_text()) if text: - page["headings"].append( - {"level": h.name, "text": text, "id": h.get("id", "")} - ) + page["headings"].append({"level": h.name, "text": text, "id": h.get("id", "")}) # Extract code blocks for code_elem in main.select("pre code, pre"): code = code_elem.get_text() if len(code.strip()) > 10: lang = self.detect_language(code_elem, code) - page["code_samples"].append( - {"code": code.strip(), "language": lang} - ) + page["code_samples"].append({"code": code.strip(), "language": lang}) # Extract paragraphs paragraphs = [] @@ -558,9 +540,7 @@ class DocToSkillConverter: # Log low-confidence detections for debugging if confidence < 0.5: - logger.debug( - f"Low confidence language detection: {lang} ({confidence:.2f})" - ) + logger.debug(f"Low confidence language detection: {lang} ({confidence:.2f})") return lang # Return string for backward compatibility @@ -573,10 +553,7 @@ class DocToSkillConverter: # Look for "Example:" or "Pattern:" sections for elem in main.find_all(["p", "div"]): text = elem.get_text().lower() - if any( - word in text - for word in ["example:", "pattern:", "usage:", "typical use"] - ): + if any(word in text for word in ["example:", "pattern:", "usage:", "typical use"]): # Get the code that follows next_code = elem.find_next(["pre", "code"]) if next_code: @@ -598,9 +575,7 @@ class DocToSkillConverter: """Save page data (skip pages with empty content)""" # Skip pages with empty or very short content if not page.get("content") or len(page.get("content", "")) < 50: - logger.debug( - "Skipping page with empty/short content: %s", page.get("url", "unknown") - ) + logger.debug("Skipping page with empty/short content: %s", page.get("url", "unknown")) return url_hash = hashlib.md5(page["url"].encode()).hexdigest()[:10] @@ -648,10 +623,7 @@ class DocToSkillConverter: # Add new URLs for link in page["links"]: - if ( - link not in self.visited_urls - and link not in self.pending_urls - ): + if link not in self.visited_urls and link not in self.pending_urls: self.pending_urls.append(link) else: # Single-threaded mode (no lock needed) @@ -672,9 +644,7 @@ class DocToSkillConverter: except Exception as e: if self.workers > 1: with self.lock: - logger.error( - " āœ— Error scraping %s: %s: %s", url, type(e).__name__, e - ) + logger.error(" āœ— Error scraping %s: %s: %s", url, type(e).__name__, e) else: logger.error(" āœ— Error scraping page: %s: %s", type(e).__name__, e) logger.error(" URL: %s", url) @@ -792,9 +762,7 @@ class DocToSkillConverter: # Check for explicit config URL first explicit_url = self.config.get("llms_txt_url") if explicit_url: - logger.info( - "\nšŸ“Œ Using explicit llms_txt_url from config: %s", explicit_url - ) + logger.info("\nšŸ“Œ Using explicit llms_txt_url from config: %s", explicit_url) # Download explicit file first downloader = LlmsTxtDownloader(explicit_url) @@ -915,9 +883,7 @@ class DocToSkillConverter: logger.info(" āœ“ %s (%d chars)", filename, len(content)) if not downloaded: - logger.warning( - "āš ļø Failed to download any variants, falling back to HTML scraping" - ) + logger.warning("āš ļø Failed to download any variants, falling back to HTML scraping") return False # Save ALL variants to references/ @@ -1032,9 +998,7 @@ class DocToSkillConverter: # Single-threaded mode (original sequential logic) if self.workers <= 1: - while self.pending_urls and ( - unlimited or len(self.visited_urls) < preview_limit - ): + while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): url = self.pending_urls.popleft() if url in self.visited_urls: @@ -1046,9 +1010,7 @@ class DocToSkillConverter: # Just show what would be scraped logger.info(" [Preview] %s", url) try: - headers = { - "User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)" - } + headers = {"User-Agent": "Mozilla/5.0 (Documentation Scraper - Dry Run)"} response = requests.get(url, headers=headers, timeout=10) soup = BeautifulSoup(response.content, "html.parser") @@ -1060,16 +1022,11 @@ class DocToSkillConverter: if main: for link in main.find_all("a", href=True): href = urljoin(url, link["href"]) - if ( - self.is_valid_url(href) - and href not in self.visited_urls - ): + if self.is_valid_url(href) and href not in self.visited_urls: self.pending_urls.append(href) except Exception as e: # Failed to extract links in fast mode, continue anyway - logger.warning( - "āš ļø Warning: Could not extract links from %s: %s", url, e - ) + logger.warning("āš ļø Warning: Could not extract links from %s: %s", url, e) else: self.scrape_page(url) self.pages_scraped += 1 @@ -1092,9 +1049,7 @@ class DocToSkillConverter: with ThreadPoolExecutor(max_workers=self.workers) as executor: futures = [] - while self.pending_urls and ( - unlimited or len(self.visited_urls) < preview_limit - ): + while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): # Get next batch of URLs (thread-safe) batch = [] batch_size = min(self.workers * 2, len(self.pending_urls)) @@ -1152,9 +1107,7 @@ class DocToSkillConverter: self.pages_scraped += 1 if self.dry_run: - logger.info( - "\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls) - ) + logger.info("\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls)) if len(self.visited_urls) >= preview_limit: logger.info( " (showing first %d, actual scraping may find more)", @@ -1221,9 +1174,7 @@ class DocToSkillConverter: ) as client: tasks = [] - while self.pending_urls and ( - unlimited or len(self.visited_urls) < preview_limit - ): + while self.pending_urls and (unlimited or len(self.visited_urls) < preview_limit): # Get next batch of URLs batch = [] batch_size = min(self.workers * 2, len(self.pending_urls)) @@ -1271,9 +1222,7 @@ class DocToSkillConverter: await asyncio.gather(*tasks, return_exceptions=True) if self.dry_run: - logger.info( - "\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls) - ) + logger.info("\nāœ… Dry run complete: would scrape ~%d pages", len(self.visited_urls)) if len(self.visited_urls) >= preview_limit: logger.info( " (showing first %d, actual scraping may find more)", @@ -1323,9 +1272,7 @@ class DocToSkillConverter: return pages - def smart_categorize( - self, pages: list[dict[str, Any]] - ) -> dict[str, list[dict[str, Any]]]: + def smart_categorize(self, pages: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]: """Improved categorization with better pattern matching""" category_defs = self.config.get("categories", {}) @@ -1377,18 +1324,14 @@ class DocToSkillConverter: for page in pages: path = urlparse(page["url"]).path segments = [ - s - for s in path.split("/") - if s and s not in ["en", "stable", "latest", "docs"] + s for s in path.split("/") if s and s not in ["en", "stable", "latest", "docs"] ] for seg in segments: url_segments[seg] += 1 # Top segments become categories - top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[ - :8 - ] + top_segments = sorted(url_segments.items(), key=lambda x: x[1], reverse=True)[:8] categories = {} for seg, count in top_segments: @@ -1408,9 +1351,7 @@ class DocToSkillConverter: return categories - def generate_quick_reference( - self, pages: list[dict[str, Any]] - ) -> list[dict[str, str]]: + def generate_quick_reference(self, pages: list[dict[str, Any]]) -> list[dict[str, str]]: """Generate quick reference from common patterns (NEW FEATURE)""" quick_ref = [] @@ -1492,9 +1433,7 @@ class DocToSkillConverter: if pages: first_page_html = pages[0].get("raw_html", "") break - description = infer_description_from_docs( - self.base_url, first_page_html, self.name - ) + description = infer_description_from_docs(self.base_url, first_page_html, self.name) else: description = self.config["description"] @@ -1502,9 +1441,7 @@ class DocToSkillConverter: example_codes = [] for pages in categories.values(): for page in pages[:3]: # First 3 pages per category - for sample in page.get("code_samples", [])[ - :2 - ]: # First 2 samples per page + for sample in page.get("code_samples", [])[:2]: # First 2 samples per page code = sample.get("code", sample if isinstance(sample, str) else "") lang = sample.get("language", "unknown") if len(code) < 200 and lang != "unknown": @@ -1554,9 +1491,7 @@ This skill should be triggered when: content += pattern.get("code", "")[:300] content += "\n```\n\n" else: - content += ( - "*Quick reference patterns will be added as you use the skill.*\n\n" - ) + content += "*Quick reference patterns will be added as you use the skill.*\n\n" # Add example codes from docs if example_codes: @@ -1571,9 +1506,7 @@ This skill includes comprehensive documentation in `references/`: """ for cat in sorted(categories.keys()): - content += ( - f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n" - ) + content += f"- **{cat}.md** - {cat.replace('_', ' ').title()} documentation\n" content += """ Use `view` to read specific reference files when detailed information is needed. @@ -1721,9 +1654,7 @@ def validate_config(config: dict[str, Any]) -> tuple[list[str], list[str]]: ) # Validate base_url - if "base_url" in config and not config["base_url"].startswith( - ("http://", "https://") - ): + if "base_url" in config and not config["base_url"].startswith(("http://", "https://")): errors.append( f"Invalid base_url: '{config['base_url']}' (must start with http:// or https://)" ) @@ -1840,18 +1771,12 @@ def load_config(config_path: str) -> dict[str, Any]: except json.JSONDecodeError as e: logger.error("āŒ Error: Invalid JSON in config file: %s", config_path) logger.error(" Details: %s", e) - logger.error( - " Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno - ) + logger.error(" Suggestion: Check syntax at line %d, column %d", e.lineno, e.colno) sys.exit(1) except FileNotFoundError: logger.error("āŒ Error: Config file not found: %s", config_path) - logger.error( - " Suggestion: Create a config file or use an existing one from configs/" - ) - logger.error( - " Available configs: react.json, vue.json, django.json, godot.json" - ) + logger.error(" Suggestion: Create a config file or use an existing one from configs/") + logger.error(" Available configs: react.json, vue.json, django.json, godot.json") sys.exit(1) # Validate config @@ -1869,9 +1794,7 @@ def load_config(config_path: str) -> dict[str, Any]: logger.error("āŒ Configuration validation errors in %s:", config_path) for error in errors: logger.error(" - %s", error) - logger.error( - "\n Suggestion: Fix the above errors or check configs/ for working examples" - ) + logger.error("\n Suggestion: Fix the above errors or check configs/ for working examples") sys.exit(1) return config @@ -2025,9 +1948,7 @@ def setup_argument_parser() -> argparse.ArgumentParser: action="store_true", help="Resume from last checkpoint (for interrupted scrapes)", ) - parser.add_argument( - "--fresh", action="store_true", help="Clear checkpoint and start fresh" - ) + parser.add_argument("--fresh", action="store_true", help="Clear checkpoint and start fresh") parser.add_argument( "--rate-limit", "-r", @@ -2126,15 +2047,11 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]: if args.workers: # Validate workers count if args.workers < 1: - logger.error( - "āŒ Error: --workers must be at least 1 (got %d)", args.workers - ) + logger.error("āŒ Error: --workers must be at least 1 (got %d)", args.workers) logger.error(" Suggestion: Use --workers 1 (default) or omit the flag") sys.exit(1) if args.workers > 10: - logger.warning( - "āš ļø Warning: --workers capped at 10 (requested %d)", args.workers - ) + logger.warning("āš ļø Warning: --workers capped at 10 (requested %d)", args.workers) args.workers = 10 config["workers"] = args.workers if args.workers > 1: @@ -2336,9 +2253,7 @@ def execute_enhancement(config: dict[str, Any], args: argparse.Namespace) -> Non # Suggest enhancement if not done if not args.enhance and not args.enhance_local: logger.info("\nšŸ’” Optional: Enhance SKILL.md with Claude:") - logger.info( - " Local (recommended): skill-seekers-enhance output/%s/", config["name"] - ) + logger.info(" Local (recommended): skill-seekers-enhance output/%s/", config["name"]) logger.info(" or re-run with: --enhance-local") logger.info( " API-based: skill-seekers-enhance-api output/%s/", diff --git a/src/skill_seekers/cli/how_to_guide_builder.py b/src/skill_seekers/cli/how_to_guide_builder.py index 070e7b8..a311881 100644 --- a/src/skill_seekers/cli/how_to_guide_builder.py +++ b/src/skill_seekers/cli/how_to_guide_builder.py @@ -79,9 +79,7 @@ class WorkflowStep: setup_required: str | None = None explanation: str | None = None # Why this step matters common_pitfall: str | None = None # Warning for this step - common_variations: list[str] = field( - default_factory=list - ) # AI: Alternative approaches + common_variations: list[str] = field(default_factory=list) # AI: Alternative approaches @dataclass @@ -223,9 +221,7 @@ class WorkflowAnalyzer: # Check if next statement is assertion (verification) idx = statements.index(stmt) verification = None - if idx + 1 < len(statements) and isinstance( - statements[idx + 1], ast.Assert - ): + if idx + 1 < len(statements) and isinstance(statements[idx + 1], ast.Assert): verification = ast.get_source_segment(code, statements[idx + 1]) steps.append( @@ -244,9 +240,7 @@ class WorkflowAnalyzer: return steps - def _extract_steps_heuristic( - self, code: str, _workflow: dict - ) -> list[WorkflowStep]: + def _extract_steps_heuristic(self, code: str, _workflow: dict) -> list[WorkflowStep]: """Extract steps using heuristics (for non-Python or invalid syntax)""" steps = [] lines = code.split("\n") @@ -282,9 +276,7 @@ class WorkflowAnalyzer: step_code = "\n".join(current_step) description = self._infer_description_from_code(step_code) steps.append( - WorkflowStep( - step_number=step_num, code=step_code, description=description - ) + WorkflowStep(step_number=step_num, code=step_code, description=description) ) return steps @@ -454,9 +446,7 @@ class WorkflowGrouper: groups = self._group_by_file_path(workflows) return groups - def _group_by_ai_tutorial_group( - self, workflows: list[dict] - ) -> dict[str, list[dict]]: + def _group_by_ai_tutorial_group(self, workflows: list[dict]) -> dict[str, list[dict]]: """Group by AI-generated tutorial_group (from C3.6 enhancement)""" groups = defaultdict(list) ungrouped = [] @@ -914,9 +904,7 @@ class HowToGuideBuilder: """Filter to workflow category only""" return [ex for ex in examples if ex.get("category") == "workflow"] - def _create_guide( - self, title: str, workflows: list[dict], enhancer=None - ) -> HowToGuide: + def _create_guide(self, title: str, workflows: list[dict], enhancer=None) -> HowToGuide: """ Generate single guide from workflow(s). @@ -974,18 +962,14 @@ class HowToGuideBuilder: # Add AI enhancements if enhancer is available if enhancer: - self._enhance_guide_with_ai( - guide, primary_workflow.get("ai_analysis", {}), enhancer - ) + self._enhance_guide_with_ai(guide, primary_workflow.get("ai_analysis", {}), enhancer) elif self.enhance_with_ai and primary_workflow.get("ai_analysis"): # Fallback to old enhancement method (basic) self._enhance_guide_with_ai_basic(guide, primary_workflow["ai_analysis"]) return guide - def _generate_overview( - self, primary_workflow: dict, _all_workflows: list[dict] - ) -> str: + def _generate_overview(self, primary_workflow: dict, _all_workflows: list[dict]) -> str: """Generate guide overview""" # Try to get explanation from AI analysis if primary_workflow.get("ai_analysis"): @@ -1019,10 +1003,7 @@ class HowToGuideBuilder: # Prepare guide data for enhancer guide_data = { "title": guide.title, - "steps": [ - {"description": step.description, "code": step.code} - for step in guide.steps - ], + "steps": [{"description": step.description, "code": step.code} for step in guide.steps], "language": "python", # TODO: Detect from code "prerequisites": guide.prerequisites, "description": guide.overview, @@ -1055,9 +1036,7 @@ class HowToGuideBuilder: if "use_cases" in enhanced_data: guide.use_cases = enhanced_data["use_cases"] - logger.info( - f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements" - ) + logger.info(f"✨ Enhanced guide '{guide.title}' with comprehensive AI improvements") def _enhance_guide_with_ai_basic(self, guide: HowToGuide, ai_analysis: dict): """ @@ -1122,9 +1101,7 @@ class HowToGuideBuilder: for guide in guides: # Generate filename from title - filename = ( - guide.title.lower().replace(" ", "-").replace(":", "") + ".md" - ) + filename = guide.title.lower().replace(" ", "-").replace(":", "") + ".md" file_path = use_case_dir / filename # Generate and save markdown @@ -1135,9 +1112,7 @@ class HowToGuideBuilder: index_markdown = self.generator.generate_index(collection.guides) (output_dir / "index.md").write_text(index_markdown, encoding="utf-8") - logger.info( - f"āœ… Saved {collection.total_guides} guides + index to {output_dir}" - ) + logger.info(f"āœ… Saved {collection.total_guides} guides + index to {output_dir}") # ============================================================================ @@ -1244,9 +1219,7 @@ Grouping Strategies: # Extract from directory using test example extractor print("āš ļø Directory input requires test example extractor") print(" Please use test_examples.json output from C3.2") - print( - f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json" - ) + print(f" Or run: skill-seekers extract-test-examples {input_path} --json > examples.json") sys.exit(1) else: diff --git a/src/skill_seekers/cli/llms_txt_parser.py b/src/skill_seekers/cli/llms_txt_parser.py index 21b1612..26997a6 100644 --- a/src/skill_seekers/cli/llms_txt_parser.py +++ b/src/skill_seekers/cli/llms_txt_parser.py @@ -127,9 +127,7 @@ class LlmsTxtParser: # Extract code blocks code_blocks = re.findall(r"```(\w+)?\n(.*?)```", content, re.DOTALL) for lang, code in code_blocks: - page["code_samples"].append( - {"code": code.strip(), "language": lang or "unknown"} - ) + page["code_samples"].append({"code": code.strip(), "language": lang or "unknown"}) # Extract h2/h3 headings headings = re.findall(r"^(#{2,3})\s+(.+)$", content, re.MULTILINE) @@ -146,9 +144,7 @@ class LlmsTxtParser: content_no_code = re.sub(r"```.*?```", "", content, flags=re.DOTALL) # Extract paragraphs - paragraphs = [ - p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20 - ] + paragraphs = [p.strip() for p in content_no_code.split("\n\n") if len(p.strip()) > 20] page["content"] = "\n\n".join(paragraphs) return page diff --git a/src/skill_seekers/cli/pattern_recognizer.py b/src/skill_seekers/cli/pattern_recognizer.py index e827e59..518569c 100644 --- a/src/skill_seekers/cli/pattern_recognizer.py +++ b/src/skill_seekers/cli/pattern_recognizer.py @@ -237,9 +237,7 @@ class PatternRecognizer: self.detectors.append(TemplateMethodDetector(self.depth)) self.detectors.append(ChainOfResponsibilityDetector(self.depth)) - def analyze_file( - self, file_path: str, content: str, language: str - ) -> PatternReport: + def analyze_file(self, file_path: str, content: str, language: str) -> PatternReport: """ Analyze a single file for design patterns. @@ -581,9 +579,7 @@ class FactoryDetector(BasePatternDetector): # Check if multiple factory methods exist (Abstract Factory pattern) if len(factory_methods) >= 2: - evidence.append( - f"Multiple factory methods: {', '.join(factory_methods[:3])}" - ) + evidence.append(f"Multiple factory methods: {', '.join(factory_methods[:3])}") confidence += 0.2 # Check for inheritance (factory hierarchy) @@ -800,35 +796,25 @@ class StrategyDetector(BasePatternDetector): ] if siblings: - evidence.append( - f"Part of strategy family with: {', '.join(siblings[:3])}" - ) + evidence.append(f"Part of strategy family with: {', '.join(siblings[:3])}") confidence += 0.5 - if base_class and ( - "strategy" in base_class.lower() or "policy" in base_class.lower() - ): + if base_class and ("strategy" in base_class.lower() or "policy" in base_class.lower()): evidence.append(f"Inherits from strategy base: {base_class}") confidence += 0.3 # Check if this is a strategy base class # (has subclasses in same file) - subclasses = [ - cls.name for cls in all_classes if class_sig.name in cls.base_classes - ] + subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes] if len(subclasses) >= 2: - evidence.append( - f"Strategy base with implementations: {', '.join(subclasses[:3])}" - ) + evidence.append(f"Strategy base with implementations: {', '.join(subclasses[:3])}") confidence += 0.6 # Check for single dominant method (strategy interface) if len(class_sig.methods) == 1 or len(class_sig.methods) == 2: # Single method or method + __init__ - main_method = [ - m for m in class_sig.methods if m.name not in ["__init__", "__new__"] - ] + main_method = [m for m in class_sig.methods if m.name not in ["__init__", "__new__"]] if main_method: evidence.append(f"Strategy interface method: {main_method[0].name}") confidence += 0.2 @@ -939,8 +925,7 @@ class DecoratorDetector(BasePatternDetector): if init_method and len(init_method.parameters) > 1: # More than just 'self' param_names = [p.name for p in init_method.parameters if p.name != "self"] if any( - name in ["wrapped", "component", "inner", "obj", "target"] - for name in param_names + name in ["wrapped", "component", "inner", "obj", "target"] for name in param_names ): evidence.append(f"Takes wrapped object in constructor: {param_names}") confidence += 0.4 @@ -1298,9 +1283,7 @@ class TemplateMethodDetector(BasePatternDetector): class_lower = class_sig.name.lower() if any(keyword in class_lower for keyword in template_keywords): # Check if has subclasses - subclasses = [ - cls.name for cls in all_classes if class_sig.name in cls.base_classes - ] + subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes] if subclasses: return PatternInstance( @@ -1310,9 +1293,7 @@ class TemplateMethodDetector(BasePatternDetector): location="", class_name=class_sig.name, line_number=class_sig.line_number, - evidence=[ - f"Abstract base with subclasses: {', '.join(subclasses[:2])}" - ], + evidence=[f"Abstract base with subclasses: {', '.join(subclasses[:2])}"], related_classes=subclasses, ) @@ -1329,9 +1310,7 @@ class TemplateMethodDetector(BasePatternDetector): # 3. Has template method that orchestrates # Check for subclasses - subclasses = [ - cls.name for cls in all_classes if class_sig.name in cls.base_classes - ] + subclasses = [cls.name for cls in all_classes if class_sig.name in cls.base_classes] if len(subclasses) >= 1: evidence.append(f"Base class with {len(subclasses)} implementations") @@ -1467,8 +1446,7 @@ class ChainOfResponsibilityDetector(BasePatternDetector): # Check for set_next() method has_set_next = any( - "next" in m.name.lower() - and ("set" in m.name.lower() or "add" in m.name.lower()) + "next" in m.name.lower() and ("set" in m.name.lower() or "add" in m.name.lower()) for m in class_sig.methods ) @@ -1489,9 +1467,7 @@ class ChainOfResponsibilityDetector(BasePatternDetector): ] if siblings and has_next_ref: - evidence.append( - f"Part of handler chain with: {', '.join(siblings[:2])}" - ) + evidence.append(f"Part of handler chain with: {', '.join(siblings[:2])}") confidence += 0.2 if confidence >= 0.5: @@ -1590,9 +1566,7 @@ class LanguageAdapter: pattern.evidence.append("Abstract Factory pattern") # Template Method: Abstract classes common - elif ( - pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str - ): + elif pattern.pattern_type == "TemplateMethod" and "abstract" in evidence_str: pattern.confidence = min(pattern.confidence + 0.1, 1.0) # Go adaptations @@ -1645,9 +1619,7 @@ class LanguageAdapter: pattern.evidence.append("Ruby Singleton module") # Builder: Method chaining is idiomatic - elif ( - pattern.pattern_type == "Builder" and "method chaining" in evidence_str - ): + elif pattern.pattern_type == "Builder" and "method chaining" in evidence_str: pattern.confidence = min(pattern.confidence + 0.05, 1.0) # PHP adaptations @@ -1702,9 +1674,7 @@ Supported Languages: action="append", help="Source file to analyze (can be specified multiple times)", ) - parser.add_argument( - "--directory", help="Directory to analyze (analyzes all source files)" - ) + parser.add_argument("--directory", help="Directory to analyze (analyzes all source files)") parser.add_argument( "--output", help="Output directory for results (default: current directory)" ) diff --git a/src/skill_seekers/cli/test_example_extractor.py b/src/skill_seekers/cli/test_example_extractor.py index 0ec427c..7baebf2 100644 --- a/src/skill_seekers/cli/test_example_extractor.py +++ b/src/skill_seekers/cli/test_example_extractor.py @@ -194,15 +194,11 @@ class PythonTestAnalyzer: for node in ast.walk(tree): if isinstance(node, ast.ClassDef): if self._is_test_class(node): - examples.extend( - self._extract_from_test_class(node, file_path, imports) - ) + examples.extend(self._extract_from_test_class(node, file_path, imports)) # Find test functions (pytest) elif isinstance(node, ast.FunctionDef) and self._is_test_function(node): - examples.extend( - self._extract_from_test_function(node, file_path, imports) - ) + examples.extend(self._extract_from_test_function(node, file_path, imports)) return examples @@ -236,9 +232,7 @@ class PythonTestAnalyzer: return True # Has @pytest.mark decorator for decorator in node.decorator_list: - if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse( - decorator - ): + if isinstance(decorator, ast.Attribute) and "pytest" in ast.unparse(decorator): return True return False @@ -255,9 +249,7 @@ class PythonTestAnalyzer: for node in class_node.body: if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): examples.extend( - self._analyze_test_body( - node, file_path, imports, setup_code=setup_code - ) + self._analyze_test_body(node, file_path, imports, setup_code=setup_code) ) return examples @@ -269,9 +261,7 @@ class PythonTestAnalyzer: # Check for fixture parameters fixture_setup = self._extract_fixtures(func_node) - return self._analyze_test_body( - func_node, file_path, imports, setup_code=fixture_setup - ) + return self._analyze_test_body(func_node, file_path, imports, setup_code=fixture_setup) def _extract_setup_method(self, class_node: ast.ClassDef) -> str | None: """Extract setUp method code""" @@ -328,9 +318,7 @@ class PythonTestAnalyzer: examples.extend(configs) # 4. Multi-step workflows (integration tests) - workflows = self._find_workflows( - func_node, file_path, docstring, setup_code, tags, imports - ) + workflows = self._find_workflows(func_node, file_path, docstring, setup_code, tags, imports) examples.extend(workflows) return examples @@ -491,9 +479,7 @@ class PythonTestAnalyzer: code=code, language="Python", description=f"Configuration example: {description}", - expected_behavior=self._extract_assertion_after( - func_node, node - ), + expected_behavior=self._extract_assertion_after(func_node, node), setup_code=setup_code, file_path=file_path, line_start=node.lineno, @@ -594,9 +580,7 @@ class PythonTestAnalyzer: integration_keywords = ["workflow", "integration", "end_to_end", "e2e", "full"] return any(keyword in test_name for keyword in integration_keywords) - def _extract_assertion_after( - self, func_node: ast.FunctionDef, target_node: ast.AST - ) -> str: + def _extract_assertion_after(self, func_node: ast.FunctionDef, target_node: ast.AST) -> str: """Find assertion that follows the target node""" found_target = False for stmt in func_node.body: @@ -727,8 +711,7 @@ class GenericTestAnalyzer: code=config_match.group(0), language=language, file_path=file_path, - line_number=code[: start_pos + config_match.start()].count("\n") - + 1, + line_number=code[: start_pos + config_match.start()].count("\n") + 1, ) examples.append(example) @@ -871,9 +854,7 @@ class TestExampleExtractor: logger.warning(f"āš ļø Failed to initialize AI enhancer: {e}") self.enhance_with_ai = False - def extract_from_directory( - self, directory: Path, recursive: bool = True - ) -> ExampleReport: + def extract_from_directory(self, directory: Path, recursive: bool = True) -> ExampleReport: """Extract examples from all test files in directory""" directory = Path(directory) @@ -927,13 +908,11 @@ class TestExampleExtractor: # Limit per file if len(filtered_examples) > self.max_per_file: # Sort by confidence and take top N - filtered_examples = sorted( - filtered_examples, key=lambda x: x.confidence, reverse=True - )[: self.max_per_file] + filtered_examples = sorted(filtered_examples, key=lambda x: x.confidence, reverse=True)[ + : self.max_per_file + ] - logger.info( - f"Extracted {len(filtered_examples)} examples from {file_path.name}" - ) + logger.info(f"Extracted {len(filtered_examples)} examples from {file_path.name}") return filtered_examples @@ -988,9 +967,7 @@ class TestExampleExtractor: # Calculate averages avg_complexity = ( - sum(ex.complexity_score for ex in examples) / len(examples) - if examples - else 0.0 + sum(ex.complexity_score for ex in examples) / len(examples) if examples else 0.0 ) high_value_count = sum(1 for ex in examples if ex.confidence > 0.7) @@ -1050,9 +1027,7 @@ Examples: help="Maximum examples per file (default: 10)", ) parser.add_argument("--json", action="store_true", help="Output JSON format") - parser.add_argument( - "--markdown", action="store_true", help="Output Markdown format" - ) + parser.add_argument("--markdown", action="store_true", help="Output Markdown format") parser.add_argument( "--recursive", action="store_true", @@ -1079,9 +1054,7 @@ Examples: examples = extractor.extract_from_file(Path(args.file)) report = extractor._create_report(examples, file_path=args.file) else: - report = extractor.extract_from_directory( - Path(args.directory), recursive=args.recursive - ) + report = extractor.extract_from_directory(Path(args.directory), recursive=args.recursive) # Output results if args.json: diff --git a/src/skill_seekers/cli/unified_codebase_analyzer.py b/src/skill_seekers/cli/unified_codebase_analyzer.py index 8876531..168ef2b 100644 --- a/src/skill_seekers/cli/unified_codebase_analyzer.py +++ b/src/skill_seekers/cli/unified_codebase_analyzer.py @@ -124,9 +124,7 @@ class UnifiedCodebaseAnalyzer: AnalysisResult with all 3 streams """ # Use three-stream fetcher - fetcher = GitHubThreeStreamFetcher( - repo_url, self.github_token, interactive=interactive - ) + fetcher = GitHubThreeStreamFetcher(repo_url, self.github_token, interactive=interactive) three_streams = fetcher.fetch(output_dir) # Analyze code with specified depth @@ -245,9 +243,7 @@ class UnifiedCodebaseAnalyzer: basic = self.basic_analysis(directory) # Run full C3.x analysis using existing codebase_scraper - print( - "šŸ” Running C3.x components (patterns, examples, guides, configs, architecture)..." - ) + print("šŸ” Running C3.x components (patterns, examples, guides, configs, architecture)...") try: # Import codebase analyzer @@ -282,19 +278,11 @@ class UnifiedCodebaseAnalyzer: c3x = {**basic, "analysis_type": "c3x", **c3x_data} print("āœ… C3.x analysis complete!") - print( - f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected" - ) - print( - f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted" - ) - print( - f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated" - ) + print(f" - {len(c3x_data.get('c3_1_patterns', []))} design patterns detected") + print(f" - {c3x_data.get('c3_2_examples_count', 0)} test examples extracted") + print(f" - {len(c3x_data.get('c3_3_guides', []))} how-to guides generated") print(f" - {len(c3x_data.get('c3_4_configs', []))} config files analyzed") - print( - f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found" - ) + print(f" - {len(c3x_data.get('c3_7_architecture', []))} architectural patterns found") return c3x @@ -451,9 +439,7 @@ class UnifiedCodebaseAnalyzer: if item.is_dir(): # Only include immediate subdirectories - structure["children"].append( - {"name": item.name, "type": "directory"} - ) + structure["children"].append({"name": item.name, "type": "directory"}) elif item.is_file(): structure["children"].append( {"name": item.name, "type": "file", "extension": item.suffix} diff --git a/tests/test_architecture_scenarios.py b/tests/test_architecture_scenarios.py index 910a767..f91d52f 100644 --- a/tests/test_architecture_scenarios.py +++ b/tests/test_architecture_scenarios.py @@ -203,15 +203,11 @@ How to use async tools. ], } - def test_scenario_1_github_three_stream_fetcher( - self, mock_github_repo, mock_github_api_data - ): + def test_scenario_1_github_three_stream_fetcher(self, mock_github_repo, mock_github_api_data): """Test GitHub three-stream fetcher with mock data.""" # Create fetcher with mock with ( - patch.object( - GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo - ), + patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo), patch.object( GitHubThreeStreamFetcher, "fetch_github_metadata", @@ -251,14 +247,10 @@ How to use async tools. assert len(three_streams.insights_stream.known_solutions) >= 1 assert len(three_streams.insights_stream.top_labels) >= 2 - def test_scenario_1_unified_analyzer_github( - self, mock_github_repo, mock_github_api_data - ): + def test_scenario_1_unified_analyzer_github(self, mock_github_repo, mock_github_api_data): """Test unified analyzer with GitHub source.""" with ( - patch.object( - GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo - ), + patch.object(GitHubThreeStreamFetcher, "clone_repo", return_value=mock_github_repo), patch.object( GitHubThreeStreamFetcher, "fetch_github_metadata", @@ -286,9 +278,7 @@ How to use async tools. {"name": "test_azure_provider", "file": "test_auth.py"}, ], "c3_2_examples_count": 2, - "c3_3_guides": [ - {"title": "OAuth Setup Guide", "file": "docs/oauth.md"} - ], + "c3_3_guides": [{"title": "OAuth Setup Guide", "file": "docs/oauth.md"}], "c3_4_configs": [], "c3_7_architecture": [ { @@ -335,9 +325,7 @@ How to use async tools. { "name": "fastmcp-oauth", "description": "OAuth authentication for FastMCP", - "categories": { - "oauth": ["oauth", "auth", "provider", "google", "azure"] - }, + "categories": {"oauth": ["oauth", "auth", "provider", "google", "azure"]}, } ) ) @@ -491,9 +479,7 @@ pip install fastmcp # Check content quality (Architecture Section 8.2) assert "Issue #42" in router_md, "Missing issue references" assert "⭐" in router_md or "Stars:" in router_md, "Missing GitHub metadata" - assert ( - "Quick Start" in router_md or "README" in router_md - ), "Missing README content" + assert "Quick Start" in router_md or "README" in router_md, "Missing README content" class TestScenario2MultiSource: @@ -617,15 +603,11 @@ class TestScenario2MultiSource: # Layer 4: GitHub insights (community knowledge) # Mock source 1 (HTML docs) - source1_data = { - "api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}] - } + source1_data = {"api": [{"name": "GoogleProvider", "params": ["app_id", "app_secret"]}]} # Mock source 2 (GitHub C3.x) source2_data = { - "api": [ - {"name": "GoogleProvider", "params": ["client_id", "client_secret"]} - ] + "api": [{"name": "GoogleProvider", "params": ["client_id", "client_secret"]}] } # Mock GitHub streams @@ -651,9 +633,7 @@ class TestScenario2MultiSource: ) # Create merger with required arguments - merger = RuleBasedMerger( - docs_data=source1_data, github_data=source2_data, conflicts=[] - ) + merger = RuleBasedMerger(docs_data=source1_data, github_data=source2_data, conflicts=[]) # Merge using merge_all() method merged = merger.merge_all() @@ -770,12 +750,8 @@ def test_connection(): mock_c3x.return_value = { "files": ["database.py", "api.py"], "analysis_type": "c3x", - "c3_1_patterns": [ - {"name": "Singleton", "count": 1, "file": "database.py"} - ], - "c3_2_examples": [ - {"name": "test_connection", "file": "test_database.py"} - ], + "c3_1_patterns": [{"name": "Singleton", "count": 1, "file": "database.py"}], + "c3_2_examples": [{"name": "test_connection", "file": "test_database.py"}], "c3_2_examples_count": 1, "c3_3_guides": [], "c3_4_configs": [], @@ -967,9 +943,7 @@ Based on analysis of GitHub issues: print(f"\nGitHub overhead: {github_overhead} lines") # Architecture target: 20-60 lines - assert ( - 20 <= github_overhead <= 60 - ), f"GitHub overhead {github_overhead} not in range 20-60" + assert 20 <= github_overhead <= 60, f"GitHub overhead {github_overhead} not in range 20-60" def test_router_size_within_limits(self): """Test router size is 150±20 lines (Architecture Section 8.1, Line 1970).""" @@ -977,9 +951,7 @@ Based on analysis of GitHub issues: router_lines = 150 # Simulated count # Architecture target: 150 lines (±20) - assert ( - 130 <= router_lines <= 170 - ), f"Router size {router_lines} not in range 130-170" + assert 130 <= router_lines <= 170, f"Router size {router_lines} not in range 130-170" def test_content_quality_requirements(self): """Test content quality (Architecture Section 8.2, Lines 1977-2014).""" @@ -1021,9 +993,9 @@ provider = GitHubProvider(client_id="...", client_secret="...") # Check minimum 3 code examples code_blocks = sub_skill_md.count("```") - assert ( - code_blocks >= 6 - ), f"Need at least 3 code examples (6 markers), found {code_blocks // 2}" + assert code_blocks >= 6, ( + f"Need at least 3 code examples (6 markers), found {code_blocks // 2}" + ) # Check language tags assert "```python" in sub_skill_md, "Code blocks must have language tags" @@ -1038,9 +1010,9 @@ provider = GitHubProvider(client_id="...", client_secret="...") # Check solution indicators for closed issues if "closed" in sub_skill_md.lower(): - assert ( - "āœ…" in sub_skill_md or "Solution" in sub_skill_md - ), "Closed issues should indicate solution found" + assert "āœ…" in sub_skill_md or "Solution" in sub_skill_md, ( + "Closed issues should indicate solution found" + ) class TestTokenEfficiencyCalculation: @@ -1077,9 +1049,9 @@ class TestTokenEfficiencyCalculation: # With selective loading and caching, achieve 35-40% # Even conservative estimate shows 29.5%, actual usage patterns show 35-40% - assert ( - reduction_percent >= 29 - ), f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)" + assert reduction_percent >= 29, ( + f"Token reduction {reduction_percent:.1f}% below 29% (conservative target)" + ) if __name__ == "__main__": diff --git a/tests/test_async_scraping.py b/tests/test_async_scraping.py index 51f0e1f..7d1da91 100644 --- a/tests/test_async_scraping.py +++ b/tests/test_async_scraping.py @@ -103,9 +103,7 @@ class TestAsyncScrapeMethods(unittest.TestCase): os.chdir(tmpdir) converter = DocToSkillConverter(config, dry_run=True) self.assertTrue(hasattr(converter, "scrape_page_async")) - self.assertTrue( - asyncio.iscoroutinefunction(converter.scrape_page_async) - ) + self.assertTrue(asyncio.iscoroutinefunction(converter.scrape_page_async)) finally: os.chdir(self.original_cwd) @@ -263,9 +261,7 @@ class TestAsyncErrorHandling(unittest.TestCase): async with httpx.AsyncClient() as client: # Mock client.get to raise exception - with patch.object( - client, "get", side_effect=httpx.HTTPError("Test error") - ): + with patch.object(client, "get", side_effect=httpx.HTTPError("Test error")): # Should not raise exception, just log error await converter.scrape_page_async( "https://example.com/test", semaphore, client diff --git a/tests/test_github_scraper.py b/tests/test_github_scraper.py index 032a3f9..149e171 100644 --- a/tests/test_github_scraper.py +++ b/tests/test_github_scraper.py @@ -134,9 +134,7 @@ class TestREADMEExtraction(unittest.TestCase): scraper._extract_readme() self.assertIn("readme", scraper.extracted_data) - self.assertEqual( - scraper.extracted_data["readme"], "# React\n\nA JavaScript library" - ) + self.assertEqual(scraper.extracted_data["readme"], "# React\n\nA JavaScript library") def test_extract_readme_tries_multiple_locations(self): """Test that README extraction tries multiple file locations""" @@ -477,15 +475,9 @@ class TestReleasesExtraction(unittest.TestCase): mock_release1.prerelease = False mock_release1.created_at = datetime(2023, 3, 1) mock_release1.published_at = datetime(2023, 3, 1) - mock_release1.html_url = ( - "https://github.com/facebook/react/releases/tag/v18.0.0" - ) - mock_release1.tarball_url = ( - "https://github.com/facebook/react/archive/v18.0.0.tar.gz" - ) - mock_release1.zipball_url = ( - "https://github.com/facebook/react/archive/v18.0.0.zip" - ) + mock_release1.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0" + mock_release1.tarball_url = "https://github.com/facebook/react/archive/v18.0.0.tar.gz" + mock_release1.zipball_url = "https://github.com/facebook/react/archive/v18.0.0.zip" mock_release2 = Mock() mock_release2.tag_name = "v18.0.0-rc.0" @@ -495,15 +487,9 @@ class TestReleasesExtraction(unittest.TestCase): mock_release2.prerelease = True mock_release2.created_at = datetime(2023, 2, 1) mock_release2.published_at = datetime(2023, 2, 1) - mock_release2.html_url = ( - "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0" - ) - mock_release2.tarball_url = ( - "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz" - ) - mock_release2.zipball_url = ( - "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip" - ) + mock_release2.html_url = "https://github.com/facebook/react/releases/tag/v18.0.0-rc.0" + mock_release2.tarball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.tar.gz" + mock_release2.zipball_url = "https://github.com/facebook/react/archive/v18.0.0-rc.0.zip" with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) @@ -612,9 +598,7 @@ class TestGitHubToSkillConverter(unittest.TestCase): config = {"repo": "facebook/react", "name": "test", "description": "Test skill"} # Override data file path - with patch( - "skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__" - ) as mock_init: + with patch("skill_seekers.cli.github_scraper.GitHubToSkillConverter.__init__") as mock_init: mock_init.return_value = None converter = self.GitHubToSkillConverter(config) converter.data_file = str(self.data_file) @@ -1000,9 +984,7 @@ class TestErrorHandling(unittest.TestCase): with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) scraper.repo = None - scraper.github.get_repo = Mock( - side_effect=GithubException(404, "Not found") - ) + scraper.github.get_repo = Mock(side_effect=GithubException(404, "Not found")) # Should raise ValueError with helpful message with self.assertRaises(ValueError) as context: @@ -1022,9 +1004,7 @@ class TestErrorHandling(unittest.TestCase): with patch("skill_seekers.cli.github_scraper.Github"): scraper = self.GitHubScraper(config) scraper.repo = Mock() - scraper.repo.get_issues.side_effect = GithubException( - 403, "Rate limit exceeded" - ) + scraper.repo.get_issues.side_effect = GithubException(403, "Rate limit exceeded") # Should handle gracefully and log warning scraper._extract_issues() diff --git a/tests/test_guide_enhancer.py b/tests/test_guide_enhancer.py index 07c78e4..2286cbd 100644 --- a/tests/test_guide_enhancer.py +++ b/tests/test_guide_enhancer.py @@ -31,9 +31,7 @@ class TestGuideEnhancerModeDetection: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="auto") @@ -111,9 +109,7 @@ class TestGuideEnhancerStepDescriptions: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") @@ -179,9 +175,7 @@ class TestGuideEnhancerTroubleshooting: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") @@ -192,9 +186,7 @@ class TestGuideEnhancerTroubleshooting: guide_data = { "title": "Test Guide", - "steps": [ - {"description": "import requests", "code": "import requests"} - ], + "steps": [{"description": "import requests", "code": "import requests"}], "language": "python", } result = enhancer.enhance_troubleshooting(guide_data) @@ -246,9 +238,7 @@ class TestGuideEnhancerPrerequisites: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") @@ -293,9 +283,7 @@ class TestGuideEnhancerNextSteps: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") @@ -340,9 +328,7 @@ class TestGuideEnhancerUseCases: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") @@ -426,9 +412,7 @@ class TestGuideEnhancerFullWorkflow: with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-ant-test"}), patch("skill_seekers.cli.guide_enhancer.ANTHROPIC_AVAILABLE", True), - patch( - "skill_seekers.cli.guide_enhancer.anthropic", create=True - ) as mock_anthropic, + patch("skill_seekers.cli.guide_enhancer.anthropic", create=True) as mock_anthropic, ): mock_anthropic.Anthropic = Mock() enhancer = GuideEnhancer(mode="api") @@ -461,9 +445,7 @@ class TestGuideEnhancerFullWorkflow: """Test graceful fallback on enhancement error""" enhancer = GuideEnhancer(mode="none") - with patch.object( - enhancer, "enhance_guide", side_effect=Exception("API error") - ): + with patch.object(enhancer, "enhance_guide", side_effect=Exception("API error")): guide_data = { "title": "Test", "steps": [], @@ -533,9 +515,7 @@ class TestGuideEnhancerPromptGeneration: guide_data = { "title": "How to Test", - "steps": [ - {"description": "Write test", "code": "def test_example(): pass"} - ], + "steps": [{"description": "Write test", "code": "def test_example(): pass"}], "language": "python", "prerequisites": ["pytest"], } @@ -583,9 +563,7 @@ class TestGuideEnhancerResponseParsing: response = json.dumps( { - "step_descriptions": [ - {"step_index": 0, "explanation": "Test", "variations": []} - ], + "step_descriptions": [{"step_index": 0, "explanation": "Test", "variations": []}], "troubleshooting": [], "prerequisites_detailed": [], "next_steps": [], diff --git a/tests/test_install_agent.py b/tests/test_install_agent.py index 6d5e00b..49f80d4 100644 --- a/tests/test_install_agent.py +++ b/tests/test_install_agent.py @@ -174,9 +174,7 @@ class TestInstallToAgent: self.skill_dir.mkdir() # Create SKILL.md - (self.skill_dir / "SKILL.md").write_text( - "# Test Skill\n\nThis is a test skill." - ) + (self.skill_dir / "SKILL.md").write_text("# Test Skill\n\nThis is a test skill.") # Create references directory with files refs_dir = self.skill_dir / "references" @@ -201,9 +199,7 @@ class TestInstallToAgent: "skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path, ): - success, message = install_to_agent( - self.skill_dir, "claude", force=True - ) + success, message = install_to_agent(self.skill_dir, "claude", force=True) assert success is True target_path = agent_path / "test-skill" @@ -219,9 +215,7 @@ class TestInstallToAgent: "skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path, ): - success, message = install_to_agent( - self.skill_dir, "claude", force=True - ) + success, message = install_to_agent(self.skill_dir, "claude", force=True) assert success is True target_path = agent_path / "test-skill" @@ -246,9 +240,7 @@ class TestInstallToAgent: "skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path, ): - success, message = install_to_agent( - self.skill_dir, "claude", force=True - ) + success, message = install_to_agent(self.skill_dir, "claude", force=True) assert success is True target_path = agent_path / "test-skill" @@ -269,9 +261,7 @@ class TestInstallToAgent: "skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path, ): - success, message = install_to_agent( - self.skill_dir, "claude", force=False - ) + success, message = install_to_agent(self.skill_dir, "claude", force=False) assert success is False assert "already installed" in message.lower() @@ -289,9 +279,7 @@ class TestInstallToAgent: "skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path, ): - success, message = install_to_agent( - self.skill_dir, "claude", force=True - ) + success, message = install_to_agent(self.skill_dir, "claude", force=True) assert success is True # Old file should be gone @@ -328,9 +316,7 @@ class TestInstallToAgent: "skill_seekers.cli.install_agent.get_agent_path", return_value=agent_path, ): - success, message = install_to_agent( - self.skill_dir, "claude", dry_run=True - ) + success, message = install_to_agent(self.skill_dir, "claude", dry_run=True) assert success is True assert "DRY RUN" in message @@ -485,9 +471,7 @@ class TestInstallAgentCLI: assert exit_code == 0 # Directory should NOT be created - assert not ( - Path(agent_tmpdir) / ".claude" / "skills" / "test-skill" - ).exists() + assert not (Path(agent_tmpdir) / ".claude" / "skills" / "test-skill").exists() def test_cli_integration(self): """Test end-to-end CLI execution.""" diff --git a/tests/test_issue_219_e2e.py b/tests/test_issue_219_e2e.py index 07852b5..218c49f 100644 --- a/tests/test_issue_219_e2e.py +++ b/tests/test_issue_219_e2e.py @@ -50,9 +50,7 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase): # Mock large CHANGELOG (1.4MB, encoding="none") mock_content = Mock() mock_content.type = "file" - mock_content.encoding = ( - "none" # This is what GitHub API returns for large files - ) + mock_content.encoding = "none" # This is what GitHub API returns for large files mock_content.size = 1388271 mock_content.download_url = ( "https://raw.githubusercontent.com/ccxt/ccxt/master/CHANGELOG.md" @@ -82,9 +80,7 @@ class TestIssue219Problem1LargeFiles(unittest.TestCase): # VERIFY: CHANGELOG was extracted successfully self.assertIn("changelog", scraper.extracted_data) self.assertIn("Bug fixes", scraper.extracted_data["changelog"]) - self.assertEqual( - scraper.extracted_data["changelog"], mock_response.text - ) + self.assertEqual(scraper.extracted_data["changelog"], mock_response.text) def test_large_file_fallback_on_error(self): """E2E: Verify graceful handling if download_url fails""" @@ -184,8 +180,7 @@ class TestIssue219Problem2CLIFlags(unittest.TestCase): # VERIFY: sys.argv contains --enhance-local flag # (main.py should have added it before calling github_scraper) called_with_enhance = any( - "--enhance-local" in str(call) - for call in mock_github_main.call_args_list + "--enhance-local" in str(call) for call in mock_github_main.call_args_list ) self.assertTrue( called_with_enhance or "--enhance-local" in sys.argv, @@ -229,9 +224,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): os.environ, {"ANTHROPIC_API_KEY": "test-key-123", "ANTHROPIC_BASE_URL": custom_url}, ), - patch( - "skill_seekers.cli.enhance_skill.anthropic.Anthropic" - ) as mock_anthropic, + patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic, ): # Create enhancer _enhancer = SkillEnhancer(self.skill_dir) @@ -258,9 +251,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): # Use ANTHROPIC_AUTH_TOKEN instead of ANTHROPIC_API_KEY with ( patch.dict(os.environ, {"ANTHROPIC_AUTH_TOKEN": custom_token}, clear=True), - patch( - "skill_seekers.cli.enhance_skill.anthropic.Anthropic" - ) as mock_anthropic, + patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic, ): # Create enhancer (should accept ANTHROPIC_AUTH_TOKEN) enhancer = SkillEnhancer(self.skill_dir) @@ -290,9 +281,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): with ( patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}), - patch( - "skill_seekers.cli.enhance_skill.anthropic.Anthropic" - ) as mock_anthropic, + patch("skill_seekers.cli.enhance_skill.anthropic.Anthropic") as mock_anthropic, ): enhancer = SkillEnhancer(self.skill_dir) @@ -301,9 +290,7 @@ class TestIssue219Problem3CustomAPIEndpoints(unittest.TestCase): mock_thinking_block = SimpleNamespace(type="thinking") # TextBlock has .text attribute - mock_text_block = SimpleNamespace( - text="# Enhanced SKILL.md\n\nContent here" - ) + mock_text_block = SimpleNamespace(text="# Enhanced SKILL.md\n\nContent here") mock_message = Mock() mock_message.content = [mock_thinking_block, mock_text_block] diff --git a/tests/test_llms_txt_downloader.py b/tests/test_llms_txt_downloader.py index a6a8042..eb607a5 100644 --- a/tests/test_llms_txt_downloader.py +++ b/tests/test_llms_txt_downloader.py @@ -31,9 +31,7 @@ def test_timeout_with_retry(): downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=2) with ( - patch( - "requests.get", side_effect=requests.Timeout("Connection timeout") - ) as mock_get, + patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, patch("time.sleep") as mock_sleep, ): # Mock sleep to speed up test content = downloader.download() @@ -143,9 +141,7 @@ def test_custom_max_retries(): downloader = LlmsTxtDownloader("https://example.com/llms.txt", max_retries=5) with ( - patch( - "requests.get", side_effect=requests.Timeout("Connection timeout") - ) as mock_get, + patch("requests.get", side_effect=requests.Timeout("Connection timeout")) as mock_get, patch("time.sleep"), ): content = downloader.download() @@ -203,7 +199,9 @@ def test_is_markdown_rejects_html_doctype(): """Test that HTML with DOCTYPE is rejected (prevents redirect trap)""" downloader = LlmsTxtDownloader("https://example.com/llms.txt") - html = "Product PageContent" + html = ( + "Product PageContent" + ) assert not downloader._is_markdown(html) # Test case-insensitive @@ -230,9 +228,7 @@ def test_is_markdown_rejects_html_meta(): html_with_head = "PageContent" assert not downloader._is_markdown(html_with_head) - html_with_meta = ( - '' - ) + html_with_meta = '' assert not downloader._is_markdown(html_with_meta) @@ -244,9 +240,7 @@ def test_is_markdown_accepts_markdown_with_html_words(): assert downloader._is_markdown(markdown) # Test with actual markdown patterns - markdown_with_code = ( - "# HTML Tutorial\n\n```html\n
example
\n```\n\n## More content" - ) + markdown_with_code = "# HTML Tutorial\n\n```html\n
example
\n```\n\n## More content" assert downloader._is_markdown(markdown_with_code) @@ -255,9 +249,7 @@ def test_html_detection_only_scans_first_500_chars(): downloader = LlmsTxtDownloader("https://example.com/llms.txt") # HTML tag after 500 chars should not be detected - safe_markdown = ( - "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n" - ) + safe_markdown = "# Header\n\n" + ("Valid markdown content. " * 50) + "\n\n" # This should pass because is beyond first 500 chars if len(safe_markdown[:500]) < len(""): # If the HTML is within 500 chars, adjust test @@ -294,9 +286,7 @@ def test_download_rejects_html_redirect(): mock_response = Mock() # Simulate server returning HTML instead of markdown - mock_response.text = ( - "

Product Page

" - ) + mock_response.text = "

Product Page

" mock_response.raise_for_status = Mock() with patch("requests.get", return_value=mock_response): diff --git a/tests/test_skip_llms_txt.py b/tests/test_skip_llms_txt.py index 4ba23b0..c7fe27d 100644 --- a/tests/test_skip_llms_txt.py +++ b/tests/test_skip_llms_txt.py @@ -73,9 +73,7 @@ class TestSkipLlmsTxtSyncBehavior(unittest.TestCase): converter = DocToSkillConverter(config, dry_run=False) with ( - patch.object( - converter, "_try_llms_txt", return_value=False - ) as mock_try, + patch.object(converter, "_try_llms_txt", return_value=False) as mock_try, patch.object(converter, "scrape_page"), patch.object(converter, "save_summary"), ): @@ -154,9 +152,7 @@ class TestSkipLlmsTxtAsyncBehavior(unittest.TestCase): converter = DocToSkillConverter(config, dry_run=False) with ( - patch.object( - converter, "_try_llms_txt", return_value=False - ) as mock_try, + patch.object(converter, "_try_llms_txt", return_value=False) as mock_try, patch.object(converter, "scrape_page_async", return_value=None), patch.object(converter, "save_summary"), ): @@ -252,9 +248,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue( - any("Invalid value" in log and "0" in log for log in cm.output) - ) + self.assertTrue(any("Invalid value" in log and "0" in log for log in cm.output)) def test_skip_llms_txt_with_int_one_logs_warning(self): """Test that integer 1 logs warning and defaults to False.""" @@ -268,9 +262,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue( - any("Invalid value" in log and "1" in log for log in cm.output) - ) + self.assertTrue(any("Invalid value" in log and "1" in log for log in cm.output)) def test_skip_llms_txt_with_string_logs_warning(self): """Test that string values log warning and default to False.""" @@ -284,9 +276,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue( - any("Invalid value" in log and "true" in log for log in cm.output) - ) + self.assertTrue(any("Invalid value" in log and "true" in log for log in cm.output)) def test_skip_llms_txt_with_none_logs_warning(self): """Test that None logs warning and defaults to False.""" @@ -300,9 +290,7 @@ class TestSkipLlmsTxtEdgeCases(unittest.TestCase): with self.assertLogs("skill_seekers.cli.doc_scraper", level="WARNING") as cm: converter = DocToSkillConverter(config, dry_run=True) self.assertFalse(converter.skip_llms_txt) - self.assertTrue( - any("Invalid value" in log and "None" in log for log in cm.output) - ) + self.assertTrue(any("Invalid value" in log and "None" in log for log in cm.output)) def test_scraping_proceeds_when_llms_txt_skipped(self): """Test that HTML scraping proceeds normally when llms.txt is skipped."""