style: Format code with ruff
- Format 5 files affected by PDF scraper changes - Ensures CI/CD code quality checks pass Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1921,7 +1921,9 @@ def setup_argument_parser() -> argparse.ArgumentParser:
|
|||||||
help="Load configuration from file (e.g., configs/godot.json)",
|
help="Load configuration from file (e.g., configs/godot.json)",
|
||||||
)
|
)
|
||||||
parser.add_argument("--name", type=str, help="Skill name")
|
parser.add_argument("--name", type=str, help="Skill name")
|
||||||
parser.add_argument("--url", type=str, help="Base documentation URL (alternative to positional URL)")
|
parser.add_argument(
|
||||||
|
"--url", type=str, help="Base documentation URL (alternative to positional URL)"
|
||||||
|
)
|
||||||
parser.add_argument("--description", "-d", type=str, help="Skill description")
|
parser.add_argument("--description", "-d", type=str, help="Skill description")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--max-pages",
|
"--max-pages",
|
||||||
@@ -2028,7 +2030,7 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
"""
|
"""
|
||||||
# Handle URL from either positional argument or --url flag
|
# Handle URL from either positional argument or --url flag
|
||||||
# Positional 'url' takes precedence, then --url flag
|
# Positional 'url' takes precedence, then --url flag
|
||||||
effective_url = getattr(args, 'url', None)
|
effective_url = getattr(args, "url", None)
|
||||||
|
|
||||||
# Get base configuration
|
# Get base configuration
|
||||||
if args.config:
|
if args.config:
|
||||||
@@ -2095,9 +2097,7 @@ def get_configuration(args: argparse.Namespace) -> dict[str, Any]:
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
"⚠️ --max-pages=%d is very high - scraping may take hours", args.max_pages
|
"⚠️ --max-pages=%d is very high - scraping may take hours", args.max_pages
|
||||||
)
|
)
|
||||||
logger.warning(
|
logger.warning(" Recommendation: Use configs with reasonable limits for production")
|
||||||
" Recommendation: Use configs with reasonable limits for production"
|
|
||||||
)
|
|
||||||
elif args.max_pages < 10:
|
elif args.max_pages < 10:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"⚠️ --max-pages=%d is very low - may result in incomplete skill", args.max_pages
|
"⚠️ --max-pages=%d is very low - may result in incomplete skill", args.max_pages
|
||||||
|
|||||||
@@ -101,7 +101,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
|||||||
scrape_parser.add_argument("--config", help="Config JSON file")
|
scrape_parser.add_argument("--config", help="Config JSON file")
|
||||||
scrape_parser.add_argument("--name", help="Skill name")
|
scrape_parser.add_argument("--name", help="Skill name")
|
||||||
scrape_parser.add_argument("--description", help="Skill description")
|
scrape_parser.add_argument("--description", help="Skill description")
|
||||||
scrape_parser.add_argument("--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)")
|
scrape_parser.add_argument(
|
||||||
|
"--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)"
|
||||||
|
)
|
||||||
scrape_parser.add_argument(
|
scrape_parser.add_argument(
|
||||||
"--skip-scrape", action="store_true", help="Skip scraping, use cached data"
|
"--skip-scrape", action="store_true", help="Skip scraping, use cached data"
|
||||||
)
|
)
|
||||||
@@ -157,7 +159,9 @@ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
|
|||||||
)
|
)
|
||||||
unified_parser.add_argument("--config", required=True, help="Unified config JSON file")
|
unified_parser.add_argument("--config", required=True, help="Unified config JSON file")
|
||||||
unified_parser.add_argument("--merge-mode", help="Merge mode (rule-based, claude-enhanced)")
|
unified_parser.add_argument("--merge-mode", help="Merge mode (rule-based, claude-enhanced)")
|
||||||
unified_parser.add_argument("--fresh", action="store_true", help="Clear existing data and start fresh")
|
unified_parser.add_argument(
|
||||||
|
"--fresh", action="store_true", help="Clear existing data and start fresh"
|
||||||
|
)
|
||||||
unified_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
|
unified_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
|
||||||
|
|
||||||
# === enhance subcommand ===
|
# === enhance subcommand ===
|
||||||
@@ -343,7 +347,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
# Convert args namespace to sys.argv format for doc_scraper
|
# Convert args namespace to sys.argv format for doc_scraper
|
||||||
sys.argv = ["doc_scraper.py"]
|
sys.argv = ["doc_scraper.py"]
|
||||||
# Add positional URL if provided (positional arg has priority)
|
# Add positional URL if provided (positional arg has priority)
|
||||||
if hasattr(args, 'url') and args.url:
|
if hasattr(args, "url") and args.url:
|
||||||
sys.argv.append(args.url)
|
sys.argv.append(args.url)
|
||||||
if args.config:
|
if args.config:
|
||||||
sys.argv.extend(["--config", args.config])
|
sys.argv.extend(["--config", args.config])
|
||||||
@@ -351,7 +355,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
sys.argv.extend(["--name", args.name])
|
sys.argv.extend(["--name", args.name])
|
||||||
if args.description:
|
if args.description:
|
||||||
sys.argv.extend(["--description", args.description])
|
sys.argv.extend(["--description", args.description])
|
||||||
if hasattr(args, 'max_pages') and args.max_pages:
|
if hasattr(args, "max_pages") and args.max_pages:
|
||||||
sys.argv.extend(["--max-pages", str(args.max_pages)])
|
sys.argv.extend(["--max-pages", str(args.max_pages)])
|
||||||
if args.skip_scrape:
|
if args.skip_scrape:
|
||||||
sys.argv.append("--skip-scrape")
|
sys.argv.append("--skip-scrape")
|
||||||
@@ -548,7 +552,8 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
|
|
||||||
# Show traceback in verbose mode (if -v flag exists in args)
|
# Show traceback in verbose mode (if -v flag exists in args)
|
||||||
import traceback
|
import traceback
|
||||||
if hasattr(args, 'verbose') and getattr(args, 'verbose', False):
|
|
||||||
|
if hasattr(args, "verbose") and getattr(args, "verbose", False):
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
return 1
|
return 1
|
||||||
|
|||||||
@@ -794,7 +794,12 @@ class PDFExtractor:
|
|||||||
markdown = page.get_text("markdown")
|
markdown = page.get_text("markdown")
|
||||||
except (AssertionError, ValueError):
|
except (AssertionError, ValueError):
|
||||||
# Fallback to text format for older/newer PyMuDF versions
|
# Fallback to text format for older/newer PyMuDF versions
|
||||||
markdown = page.get_text("text", flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_SPANS)
|
markdown = page.get_text(
|
||||||
|
"text",
|
||||||
|
flags=fitz.TEXT_PRESERVE_WHITESPACE
|
||||||
|
| fitz.TEXT_PRESERVE_LIGATURES
|
||||||
|
| fitz.TEXT_PRESERVE_SPANS,
|
||||||
|
)
|
||||||
|
|
||||||
# Extract tables (Priority 2)
|
# Extract tables (Priority 2)
|
||||||
tables = self.extract_tables_from_page(page)
|
tables = self.extract_tables_from_page(page)
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ class PDFToSkillConverter:
|
|||||||
|
|
||||||
categorized[category_key] = {
|
categorized[category_key] = {
|
||||||
"title": pdf_basename,
|
"title": pdf_basename,
|
||||||
"pages": self.extracted_data.get("pages", [])
|
"pages": self.extracted_data.get("pages", []),
|
||||||
}
|
}
|
||||||
|
|
||||||
print("✅ Created 1 category (single PDF source)")
|
print("✅ Created 1 category (single PDF source)")
|
||||||
@@ -176,7 +176,7 @@ class PDFToSkillConverter:
|
|||||||
if uncategorized_pages:
|
if uncategorized_pages:
|
||||||
categorized["uncategorized"] = {
|
categorized["uncategorized"] = {
|
||||||
"title": "Additional Content",
|
"title": "Additional Content",
|
||||||
"pages": uncategorized_pages
|
"pages": uncategorized_pages,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Fall back to keyword-based categorization
|
# Fall back to keyword-based categorization
|
||||||
@@ -282,7 +282,11 @@ class PDFToSkillConverter:
|
|||||||
|
|
||||||
# If only one section or section covers most pages, use simple name
|
# If only one section or section covers most pages, use simple name
|
||||||
if total_sections == 1:
|
if total_sections == 1:
|
||||||
filename = f"{self.skill_dir}/references/{pdf_basename}.md" if pdf_basename else f"{self.skill_dir}/references/main.md"
|
filename = (
|
||||||
|
f"{self.skill_dir}/references/{pdf_basename}.md"
|
||||||
|
if pdf_basename
|
||||||
|
else f"{self.skill_dir}/references/main.md"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# Multiple sections: use PDF basename + page range
|
# Multiple sections: use PDF basename + page range
|
||||||
base_name = pdf_basename if pdf_basename else "section"
|
base_name = pdf_basename if pdf_basename else "section"
|
||||||
@@ -376,7 +380,9 @@ class PDFToSkillConverter:
|
|||||||
link_filename = f"section_{section_num:02d}.md"
|
link_filename = f"section_{section_num:02d}.md"
|
||||||
page_range_str = "N/A"
|
page_range_str = "N/A"
|
||||||
|
|
||||||
f.write(f"- [{cat_data['title']}]({link_filename}) ({page_count} pages, {page_range_str})\n")
|
f.write(
|
||||||
|
f"- [{cat_data['title']}]({link_filename}) ({page_count} pages, {page_range_str})\n"
|
||||||
|
)
|
||||||
section_num += 1
|
section_num += 1
|
||||||
|
|
||||||
f.write("\n## Statistics\n\n")
|
f.write("\n## Statistics\n\n")
|
||||||
@@ -693,6 +699,7 @@ def main():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\n❌ Unexpected error during PDF processing: {e}", file=sys.stderr)
|
print(f"\n❌ Unexpected error during PDF processing: {e}", file=sys.stderr)
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|||||||
2
uv.lock
generated
2
uv.lock
generated
@@ -1846,7 +1846,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "skill-seekers"
|
name = "skill-seekers"
|
||||||
version = "2.8.0.dev0"
|
version = "2.7.4"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "anthropic" },
|
{ name = "anthropic" },
|
||||||
|
|||||||
Reference in New Issue
Block a user