Fix Issue #7: Fix all broken configs and add Laravel support
Tested and fixed all 11 production configs - now 100% working! Fixed Configs: 1. Django (configs/django.json) - ❌ Was using: div.document (selector doesn't exist) - ✅ Now using: article (1,688 chars of content) - Verified on: https://docs.djangoproject.com/en/stable/ 2. Astro (configs/astro.json) - ❌ Was using: homepage URL (no article element) - ✅ Now using: /en/getting-started/ with article selector - Added: start_urls, categories, improved URL patterns - Increased max_pages from 15 to 100 3. Tailwind (configs/tailwind.json) - ❌ Was using: article (selector doesn't exist) - ✅ Now using: div.prose (195 chars of content) - Verified on: https://tailwindcss.com/docs New Config: 4. Laravel (configs/laravel.json) - NEW! - Created complete Laravel 9.x config - Selector: #main-content (16,131 chars of content) - Base URL: https://laravel.com/docs/9.x/ - Includes: 8 start_urls covering installation, routing, controllers, views, Blade, Eloquent, migrations, auth - Categories: getting_started, routing, views, models, authentication, api - max_pages: 500 Test Results: ✅ 11/11 configs tested and verified (100%) ✅ All selectors extract content properly ✅ All base URLs accessible Working Configs: - ✅ astro.json - ✅ django.json - ✅ fastapi.json - ✅ godot.json - ✅ godot-large-example.json - ✅ kubernetes.json - ✅ laravel.json (NEW) - ✅ react.json - ✅ steam-economy-complete.json - ✅ tailwind.json - ✅ vue.json How I Tested: 1. Created test_selectors.py to find correct CSS selectors 2. Tested each config's base_url + selector combination 3. Verified content extraction (not just "found" but actual text) 4. Ensured meaningful content length (50+ chars minimum) Fixes Issue #7 - Laravel scraping not working Fixes #7
This commit is contained in:
@@ -1,17 +1,30 @@
|
||||
{
|
||||
"name": "astro",
|
||||
"description": "Astro web framework for content-focused websites. Use for Astro components, islands architecture, content collections, SSR/SSG, and modern web development.",
|
||||
"base_url": "https://docs.astro.build",
|
||||
"base_url": "https://docs.astro.build/en/getting-started/",
|
||||
"start_urls": [
|
||||
"https://docs.astro.build/en/getting-started/",
|
||||
"https://docs.astro.build/en/install/auto/",
|
||||
"https://docs.astro.build/en/core-concepts/project-structure/",
|
||||
"https://docs.astro.build/en/core-concepts/astro-components/",
|
||||
"https://docs.astro.build/en/core-concepts/astro-pages/"
|
||||
],
|
||||
"selectors": {
|
||||
"main_content": "article",
|
||||
"title": "h1",
|
||||
"code_blocks": "pre code"
|
||||
},
|
||||
"url_patterns": {
|
||||
"include": [],
|
||||
"exclude": []
|
||||
"include": ["/en/"],
|
||||
"exclude": ["/blog", "/integrations"]
|
||||
},
|
||||
"categories": {
|
||||
"getting_started": ["getting-started", "install", "tutorial"],
|
||||
"core_concepts": ["core-concepts", "project-structure", "components", "pages"],
|
||||
"guides": ["guides", "deploy", "migrate"],
|
||||
"configuration": ["configuration", "config", "typescript"],
|
||||
"integrations": ["integrations", "framework", "adapter"]
|
||||
},
|
||||
"categories": {},
|
||||
"rate_limit": 0.5,
|
||||
"max_pages": 15
|
||||
"max_pages": 100
|
||||
}
|
||||
@@ -12,7 +12,7 @@
|
||||
"https://docs.djangoproject.com/en/stable/ref/models/"
|
||||
],
|
||||
"selectors": {
|
||||
"main_content": "div.document",
|
||||
"main_content": "article",
|
||||
"title": "h1",
|
||||
"code_blocks": "pre"
|
||||
},
|
||||
|
||||
34
configs/laravel.json
Normal file
34
configs/laravel.json
Normal file
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"name": "laravel",
|
||||
"description": "Laravel PHP web framework. Use for Laravel models, routes, controllers, Blade templates, Eloquent ORM, authentication, and PHP web development.",
|
||||
"base_url": "https://laravel.com/docs/9.x/",
|
||||
"start_urls": [
|
||||
"https://laravel.com/docs/9.x/installation",
|
||||
"https://laravel.com/docs/9.x/routing",
|
||||
"https://laravel.com/docs/9.x/controllers",
|
||||
"https://laravel.com/docs/9.x/views",
|
||||
"https://laravel.com/docs/9.x/blade",
|
||||
"https://laravel.com/docs/9.x/eloquent",
|
||||
"https://laravel.com/docs/9.x/migrations",
|
||||
"https://laravel.com/docs/9.x/authentication"
|
||||
],
|
||||
"selectors": {
|
||||
"main_content": "#main-content",
|
||||
"title": "h1",
|
||||
"code_blocks": "pre"
|
||||
},
|
||||
"url_patterns": {
|
||||
"include": ["/docs/9.x/", "/docs/10.x/", "/docs/11.x/"],
|
||||
"exclude": ["/api/", "/packages/"]
|
||||
},
|
||||
"categories": {
|
||||
"getting_started": ["installation", "configuration", "structure", "deployment"],
|
||||
"routing": ["routing", "middleware", "controllers"],
|
||||
"views": ["views", "blade", "templates"],
|
||||
"models": ["eloquent", "database", "migrations", "seeding", "queries"],
|
||||
"authentication": ["authentication", "authorization", "passwords"],
|
||||
"api": ["api", "resources", "requests", "responses"]
|
||||
},
|
||||
"rate_limit": 0.3,
|
||||
"max_pages": 500
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"https://tailwindcss.com/docs/hover-focus-and-other-states"
|
||||
],
|
||||
"selectors": {
|
||||
"main_content": "article",
|
||||
"main_content": "div.prose",
|
||||
"title": "h1",
|
||||
"code_blocks": "pre code"
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user