style: Format code with ruff

- Format 5 files affected by PDF scraper changes
- Ensures CI/CD code quality checks pass

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
yusyus
2026-01-27 21:08:05 +03:00
parent 3fc4b54164
commit 8f720670f2
5 changed files with 33 additions and 16 deletions

View File

@@ -1921,7 +1921,9 @@ def setup_argument_parser() -> argparse.ArgumentParser:
help="Load configuration from file (e.g., configs/godot.json)",
)
parser.add_argument("--name", type=str, help="Skill name")
parser.add_argument("--url", type=str, help="Base documentation URL (alternative to positional URL)")
parser.add_argument(
"--url", type=str, help="Base documentation URL (alternative to positional URL)"
)
parser.add_argument("--description", "-d", type=str, help="Skill description")
parser.add_argument(
"--max-pages",
@@ -2028,7 +2030,7 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
"""
# Handle URL from either positional argument or --url flag
# Positional 'url' takes precedence, then --url flag
effective_url = getattr(args, 'url', None)
effective_url = getattr(args, "url", None)
# Get base configuration
if args.config:
@@ -2095,9 +2097,7 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
logger.warning(
"⚠️ --max-pages=%d is very high - scraping may take hours", args.max_pages
)
logger.warning(
" Recommendation: Use configs with reasonable limits for production"
)
logger.warning(" Recommendation: Use configs with reasonable limits for production")
elif args.max_pages < 10:
logger.warning(
"⚠️ --max-pages=%d is very low - may result in incomplete skill", args.max_pages

View File

@@ -101,7 +101,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
scrape_parser.add_argument("--config", help="Config JSON file")
scrape_parser.add_argument("--name", help="Skill name")
scrape_parser.add_argument("--description", help="Skill description")
scrape_parser.add_argument("--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)")
scrape_parser.add_argument(
"--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)"
)
scrape_parser.add_argument(
"--skip-scrape", action="store_true", help="Skip scraping, use cached data"
)
@@ -157,7 +159,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
)
unified_parser.add_argument("--config", required=True, help="Unified config JSON file")
unified_parser.add_argument("--merge-mode", help="Merge mode (rule-based, claude-enhanced)")
unified_parser.add_argument("--fresh", action="store_true", help="Clear existing data and start fresh")
unified_parser.add_argument(
"--fresh", action="store_true", help="Clear existing data and start fresh"
)
unified_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
# === enhance subcommand ===
@@ -343,7 +347,7 @@ def main(argv: list[str] | None = None) -> int:
# Convert args namespace to sys.argv format for doc_scraper
sys.argv = ["doc_scraper.py"]
# Add positional URL if provided (positional arg has priority)
if hasattr(args, 'url') and args.url:
if hasattr(args, "url") and args.url:
sys.argv.append(args.url)
if args.config:
sys.argv.extend(["--config", args.config])
@@ -351,7 +355,7 @@ def main(argv: list[str] | None = None) -> int:
sys.argv.extend(["--name", args.name])
if args.description:
sys.argv.extend(["--description", args.description])
if hasattr(args, 'max_pages') and args.max_pages:
if hasattr(args, "max_pages") and args.max_pages:
sys.argv.extend(["--max-pages", str(args.max_pages)])
if args.skip_scrape:
sys.argv.append("--skip-scrape")
@@ -548,7 +552,8 @@ def main(argv: list[str] | None = None) -> int:
# Show traceback in verbose mode (if -v flag exists in args)
import traceback
if hasattr(args, 'verbose') and getattr(args, 'verbose', False):
if hasattr(args, "verbose") and getattr(args, "verbose", False):
traceback.print_exc()
return 1

View File

@@ -794,7 +794,12 @@ class PDFExtractor:
markdown = page.get_text("markdown")
except (AssertionError, ValueError):
# Fallback to text format for older/newer PyMuDF versions
markdown = page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_SPANS)
markdown = page.get_text(
"text",
flags=fitz.TEXT_PRESERVE_WHITESPACE
| fitz.TEXT_PRESERVE_LIGATURES
| fitz.TEXT_PRESERVE_SPANS,
)
# Extract tables (Priority 2)
tables = self.extract_tables_from_page(page)

View File

@@ -141,7 +141,7 @@ class PDFToSkillConverter:
categorized[category_key] = {
"title": pdf_basename,
"pages": self.extracted_data.get("pages", [])
"pages": self.extracted_data.get("pages", []),
}
print("✅ Created 1 category (single PDF source)")
@@ -176,7 +176,7 @@ class PDFToSkillConverter:
if uncategorized_pages:
categorized["uncategorized"] = {
"title": "Additional Content",
"pages": uncategorized_pages
"pages": uncategorized_pages,
}
# Fall back to keyword-based categorization
@@ -282,7 +282,11 @@ class PDFToSkillConverter:
# If only one section or section covers most pages, use simple name
if total_sections == 1:
filename = f"{self.skill_dir}/references/{pdf_basename}.md" if pdf_basename else f"{self.skill_dir}/references/main.md"
filename = (
f"{self.skill_dir}/references/{pdf_basename}.md"
if pdf_basename
else f"{self.skill_dir}/references/main.md"
)
else:
# Multiple sections: use PDF basename + page range
base_name = pdf_basename if pdf_basename else "section"
@@ -376,7 +380,9 @@ class PDFToSkillConverter:
link_filename = f"section_{section_num:02d}.md"
page_range_str = "N/A"
f.write(f"- [{cat_data['title']}]({link_filename}) ({page_count} pages, {page_range_str})\n")
f.write(
f"- [{cat_data['title']}]({link_filename}) ({page_count} pages, {page_range_str})\n"
)
section_num += 1
f.write("\n## Statistics\n\n")
@@ -693,6 +699,7 @@ def main():
except Exception as e:
print(f"\n❌ Unexpected error during PDF processing: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)

2
uv.lock generated
View File

@@ -1846,7 +1846,7 @@ wheels = [
[[package]]
name = "skill-seekers"
version = "2.8.0.dev0"
version = "2.7.4"
source = { editable = "." }
dependencies = [
{ name = "anthropic" },