All configs now follow the steam-economy-complete.json pattern with: - Multiple start_urls for comprehensive entry points - Improved include patterns for better targeting - Enhanced exclude patterns to skip irrelevant pages Godot Config: - Added 7 start_urls covering getting started, scripting, 2D, 3D, physics, animation, and classes - Added include patterns: /getting_started/, /tutorials/, /classes/ - More focused scraping of core documentation React Config: - Added 6 start_urls covering learn, quick-start, reference, and hooks - Existing patterns maintained (already well-optimized) Vue Config: - Added 6 start_urls covering introduction, essentials, components, composables, and API - Fixed base_url from https://vuejs.org/guide/ to https://vuejs.org/ - Added /partners/ to exclude list Django Config: - Added 7 start_urls covering intro, models, views, templates, forms, auth, and reference - Added /intro/ to include patterns - Added /releases/ to exclude list (changelog not needed) FastAPI Config: - Added 7 start_urls covering tutorial, first-steps, path-params, body, dependencies, advanced, and reference - Added /deployment/ to exclude list Benefits: - Better initial page discovery - More comprehensive documentation coverage - Faster scraping (direct entry to important sections) - Reduced unnecessary page crawling - Consistent pattern across all configs All configs tested and validated: ✅ 71/71 tests passing ✅ All 6 configs validated successfully 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
35 lines
1.3 KiB
JSON
35 lines
1.3 KiB
JSON
{
|
|
"name": "django",
|
|
"description": "Django web framework for Python. Use for Django models, views, templates, ORM, authentication, and web development.",
|
|
"base_url": "https://docs.djangoproject.com/en/stable/",
|
|
"start_urls": [
|
|
"https://docs.djangoproject.com/en/stable/intro/",
|
|
"https://docs.djangoproject.com/en/stable/topics/db/models/",
|
|
"https://docs.djangoproject.com/en/stable/topics/http/views/",
|
|
"https://docs.djangoproject.com/en/stable/topics/templates/",
|
|
"https://docs.djangoproject.com/en/stable/topics/forms/",
|
|
"https://docs.djangoproject.com/en/stable/topics/auth/",
|
|
"https://docs.djangoproject.com/en/stable/ref/models/"
|
|
],
|
|
"selectors": {
|
|
"main_content": "div.document",
|
|
"title": "h1",
|
|
"code_blocks": "pre"
|
|
},
|
|
"url_patterns": {
|
|
"include": ["/intro/", "/topics/", "/ref/", "/howto/"],
|
|
"exclude": ["/faq/", "/misc/", "/releases/"]
|
|
},
|
|
"categories": {
|
|
"getting_started": ["intro", "tutorial", "install"],
|
|
"models": ["models", "database", "orm", "queries"],
|
|
"views": ["views", "urlconf", "routing"],
|
|
"templates": ["templates", "template"],
|
|
"forms": ["forms", "form"],
|
|
"authentication": ["auth", "authentication", "user"],
|
|
"api": ["ref", "reference"]
|
|
},
|
|
"rate_limit": 0.3,
|
|
"max_pages": 500
|
|
}
|