From 8f720670f2d71e40094e4fcb938774aebcd12494 Mon Sep 17 00:00:00 2001 From: yusyus Date: Tue, 27 Jan 2026 21:08:05 +0300 Subject: [PATCH] style: Format code with ruff - Format 5 files affected by PDF scraper changes - Ensures CI/CD code quality checks pass Co-Authored-By: Claude Sonnet 4.5 --- src/skill_seekers/cli/doc_scraper.py | 10 +++++----- src/skill_seekers/cli/main.py | 15 ++++++++++----- src/skill_seekers/cli/pdf_extractor_poc.py | 7 ++++++- src/skill_seekers/cli/pdf_scraper.py | 15 +++++++++++---- uv.lock | 2 +- 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/skill_seekers/cli/doc_scraper.py b/src/skill_seekers/cli/doc_scraper.py index 3ac94a0..b741906 100755 --- a/src/skill_seekers/cli/doc_scraper.py +++ b/src/skill_seekers/cli/doc_scraper.py @@ -1921,7 +1921,9 @@ def setup_argument_parser() -> argparse.ArgumentParser: help="Load configuration from file (e.g., configs/godot.json)", ) parser.add_argument("--name", type=str, help="Skill name") - parser.add_argument("--url", type=str, help="Base documentation URL (alternative to positional URL)") + parser.add_argument( + "--url", type=str, help="Base documentation URL (alternative to positional URL)" + ) parser.add_argument("--description", "-d", type=str, help="Skill description") parser.add_argument( "--max-pages", @@ -2028,7 +2030,7 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]: """ # Handle URL from either positional argument or --url flag # Positional 'url' takes precedence, then --url flag - effective_url = getattr(args, 'url', None) + effective_url = getattr(args, "url", None) # Get base configuration if args.config: @@ -2095,9 +2097,7 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]: logger.warning( "⚠️ --max-pages=%d is very high - scraping may take hours", args.max_pages ) - logger.warning( - " Recommendation: Use configs with reasonable limits for production" - ) + logger.warning(" Recommendation: Use configs with reasonable limits for production") elif args.max_pages < 10: logger.warning( "⚠️ --max-pages=%d is very low - may result in incomplete skill", args.max_pages diff --git a/src/skill_seekers/cli/main.py b/src/skill_seekers/cli/main.py index cc5d64b..d1cf9d8 100644 --- a/src/skill_seekers/cli/main.py +++ b/src/skill_seekers/cli/main.py @@ -101,7 +101,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers scrape_parser.add_argument("--config", help="Config JSON file") scrape_parser.add_argument("--name", help="Skill name") scrape_parser.add_argument("--description", help="Skill description") - scrape_parser.add_argument("--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)") + scrape_parser.add_argument( + "--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)" + ) scrape_parser.add_argument( "--skip-scrape", action="store_true", help="Skip scraping, use cached data" ) @@ -157,7 +159,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers ) unified_parser.add_argument("--config", required=True, help="Unified config JSON file") unified_parser.add_argument("--merge-mode", help="Merge mode (rule-based, claude-enhanced)") - unified_parser.add_argument("--fresh", action="store_true", help="Clear existing data and start fresh") + unified_parser.add_argument( + "--fresh", action="store_true", help="Clear existing data and start fresh" + ) unified_parser.add_argument("--dry-run", action="store_true", help="Dry run mode") # === enhance subcommand === @@ -343,7 +347,7 @@ def main(argv: list[str] | None = None) -> int: # Convert args namespace to sys.argv format for doc_scraper sys.argv = ["doc_scraper.py"] # Add positional URL if provided (positional arg has priority) - if hasattr(args, 'url') and args.url: + if hasattr(args, "url") and args.url: sys.argv.append(args.url) if args.config: sys.argv.extend(["--config", args.config]) @@ -351,7 +355,7 @@ def main(argv: list[str] | None = None) -> int: sys.argv.extend(["--name", args.name]) if args.description: sys.argv.extend(["--description", args.description]) - if hasattr(args, 'max_pages') and args.max_pages: + if hasattr(args, "max_pages") and args.max_pages: sys.argv.extend(["--max-pages", str(args.max_pages)]) if args.skip_scrape: sys.argv.append("--skip-scrape") @@ -548,7 +552,8 @@ def main(argv: list[str] | None = None) -> int: # Show traceback in verbose mode (if -v flag exists in args) import traceback - if hasattr(args, 'verbose') and getattr(args, 'verbose', False): + + if hasattr(args, "verbose") and getattr(args, "verbose", False): traceback.print_exc() return 1 diff --git a/src/skill_seekers/cli/pdf_extractor_poc.py b/src/skill_seekers/cli/pdf_extractor_poc.py index 56adf56..9914c24 100755 --- a/src/skill_seekers/cli/pdf_extractor_poc.py +++ b/src/skill_seekers/cli/pdf_extractor_poc.py @@ -794,7 +794,12 @@ class PDFExtractor: markdown = page.get_text("markdown") except (AssertionError, ValueError): # Fallback to text format for older/newer PyMuDF versions - markdown = page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_SPANS) + markdown = page.get_text( + "text", + flags=fitz.TEXT_PRESERVE_WHITESPACE + | fitz.TEXT_PRESERVE_LIGATURES + | fitz.TEXT_PRESERVE_SPANS, + ) # Extract tables (Priority 2) tables = self.extract_tables_from_page(page) diff --git a/src/skill_seekers/cli/pdf_scraper.py b/src/skill_seekers/cli/pdf_scraper.py index 265aec3..6096124 100644 --- a/src/skill_seekers/cli/pdf_scraper.py +++ b/src/skill_seekers/cli/pdf_scraper.py @@ -141,7 +141,7 @@ class PDFToSkillConverter: categorized[category_key] = { "title": pdf_basename, - "pages": self.extracted_data.get("pages", []) + "pages": self.extracted_data.get("pages", []), } print("✅ Created 1 category (single PDF source)") @@ -176,7 +176,7 @@ class PDFToSkillConverter: if uncategorized_pages: categorized["uncategorized"] = { "title": "Additional Content", - "pages": uncategorized_pages + "pages": uncategorized_pages, } # Fall back to keyword-based categorization @@ -282,7 +282,11 @@ class PDFToSkillConverter: # If only one section or section covers most pages, use simple name if total_sections == 1: - filename = f"{self.skill_dir}/references/{pdf_basename}.md" if pdf_basename else f"{self.skill_dir}/references/main.md" + filename = ( + f"{self.skill_dir}/references/{pdf_basename}.md" + if pdf_basename + else f"{self.skill_dir}/references/main.md" + ) else: # Multiple sections: use PDF basename + page range base_name = pdf_basename if pdf_basename else "section" @@ -376,7 +380,9 @@ class PDFToSkillConverter: link_filename = f"section_{section_num:02d}.md" page_range_str = "N/A" - f.write(f"- [{cat_data['title']}]({link_filename}) ({page_count} pages, {page_range_str})\n") + f.write( + f"- [{cat_data['title']}]({link_filename}) ({page_count} pages, {page_range_str})\n" + ) section_num += 1 f.write("\n## Statistics\n\n") @@ -693,6 +699,7 @@ def main(): except Exception as e: print(f"\n❌ Unexpected error during PDF processing: {e}", file=sys.stderr) import traceback + traceback.print_exc() sys.exit(1) diff --git a/uv.lock b/uv.lock index d16b605..682385f 100644 --- a/uv.lock +++ b/uv.lock @@ -1846,7 +1846,7 @@ wheels = [ [[package]] name = "skill-seekers" -version = "2.8.0.dev0" +version = "2.7.4" source = { editable = "." } dependencies = [ { name = "anthropic" },