Fix: include docs references in unified skill output (#213)
* Fix: include docs references in unified skill output * Fix: quality checker counts nested reference files * fix(unified): pass through llms_txt_url and skip_llms_txt to doc scraper * configs: add svelte CLI unified preset (llms.txt + categories) --------- Co-authored-by: Chris Engelhard <chris@chrisengelhard.nl>
This commit is contained in:
68
configs/svelte_cli_unified.json
Normal file
68
configs/svelte_cli_unified.json
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
{
|
||||||
|
"name": "svelte-cli",
|
||||||
|
"description": "Svelte CLI: docs (llms.txt) + GitHub repository (commands, project scaffolding, dev/build workflows).",
|
||||||
|
"merge_mode": "rule-based",
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"type": "documentation",
|
||||||
|
"base_url": "https://svelte.dev/docs/cli",
|
||||||
|
"llms_txt_url": "https://svelte.dev/docs/cli/llms.txt",
|
||||||
|
"extract_api": true,
|
||||||
|
"selectors": {
|
||||||
|
"main_content": "#main, main",
|
||||||
|
"title": "h1",
|
||||||
|
"code_blocks": "pre code, pre"
|
||||||
|
},
|
||||||
|
"url_patterns": {
|
||||||
|
"include": ["/docs/cli"],
|
||||||
|
"exclude": [
|
||||||
|
"/docs/kit",
|
||||||
|
"/docs/svelte",
|
||||||
|
"/docs/mcp",
|
||||||
|
"/tutorial",
|
||||||
|
"/packages",
|
||||||
|
"/playground",
|
||||||
|
"/blog"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"categories": {
|
||||||
|
"overview": ["overview"],
|
||||||
|
"faq": ["frequently asked questions"],
|
||||||
|
"sv_create": ["sv create"],
|
||||||
|
"sv_add": ["sv add"],
|
||||||
|
"sv_check": ["sv check"],
|
||||||
|
"sv_migrate": ["sv migrate"],
|
||||||
|
"devtools_json": ["devtools-json"],
|
||||||
|
"drizzle": ["drizzle"],
|
||||||
|
"eslint": ["eslint"],
|
||||||
|
"lucia": ["lucia"],
|
||||||
|
"mcp": ["mcp"],
|
||||||
|
"mdsvex": ["mdsvex"],
|
||||||
|
"paraglide": ["paraglide"],
|
||||||
|
"playwright": ["playwright"],
|
||||||
|
"prettier": ["prettier"],
|
||||||
|
"storybook": ["storybook"],
|
||||||
|
"sveltekit_adapter": ["sveltekit-adapter"],
|
||||||
|
"tailwindcss": ["tailwindcss"],
|
||||||
|
"vitest": ["vitest"]
|
||||||
|
},
|
||||||
|
"rate_limit": 0.5,
|
||||||
|
"max_pages": 200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"repo": "sveltejs/cli",
|
||||||
|
"include_issues": true,
|
||||||
|
"max_issues": 150,
|
||||||
|
"include_changelog": true,
|
||||||
|
"include_releases": true,
|
||||||
|
"include_code": true,
|
||||||
|
"code_analysis_depth": "deep",
|
||||||
|
"file_patterns": [
|
||||||
|
"src/**/*.ts",
|
||||||
|
"src/**/*.js"
|
||||||
|
],
|
||||||
|
"local_repo_path": "local_paths/sveltekit/cli"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -146,7 +146,7 @@ class SkillQualityChecker:
|
|||||||
'references/ directory not found - skill may be incomplete',
|
'references/ directory not found - skill may be incomplete',
|
||||||
str(self.references_dir)
|
str(self.references_dir)
|
||||||
)
|
)
|
||||||
elif not list(self.references_dir.glob('*.md')):
|
elif not list(self.references_dir.rglob('*.md')):
|
||||||
self.report.add_warning(
|
self.report.add_warning(
|
||||||
'structure',
|
'structure',
|
||||||
'references/ directory is empty - no reference documentation found',
|
'references/ directory is empty - no reference documentation found',
|
||||||
@@ -298,7 +298,7 @@ class SkillQualityChecker:
|
|||||||
|
|
||||||
# Check reference files
|
# Check reference files
|
||||||
if self.references_dir.exists():
|
if self.references_dir.exists():
|
||||||
ref_files = list(self.references_dir.glob('*.md'))
|
ref_files = list(self.references_dir.rglob('*.md'))
|
||||||
if ref_files:
|
if ref_files:
|
||||||
self.report.add_info(
|
self.report.add_info(
|
||||||
'content',
|
'content',
|
||||||
|
|||||||
@@ -129,6 +129,17 @@ class UnifiedScraper:
|
|||||||
'max_pages': source.get('max_pages', 100)
|
'max_pages': source.get('max_pages', 100)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Pass through llms.txt settings (so unified configs behave the same as doc_scraper configs)
|
||||||
|
if 'llms_txt_url' in source:
|
||||||
|
doc_config['llms_txt_url'] = source.get('llms_txt_url')
|
||||||
|
|
||||||
|
if 'skip_llms_txt' in source:
|
||||||
|
doc_config['skip_llms_txt'] = source.get('skip_llms_txt')
|
||||||
|
|
||||||
|
# Optional: support overriding start URLs
|
||||||
|
if 'start_urls' in source:
|
||||||
|
doc_config['start_urls'] = source.get('start_urls')
|
||||||
|
|
||||||
# Write temporary config
|
# Write temporary config
|
||||||
temp_config_path = os.path.join(self.data_dir, 'temp_docs_config.json')
|
temp_config_path = os.path.join(self.data_dir, 'temp_docs_config.json')
|
||||||
with open(temp_config_path, 'w', encoding='utf-8') as f:
|
with open(temp_config_path, 'w', encoding='utf-8') as f:
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ discrepancies transparently.
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import shutil
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Any, Optional
|
from typing import Dict, List, Any, Optional
|
||||||
@@ -286,12 +287,42 @@ This skill combines knowledge from multiple sources:
|
|||||||
docs_dir = os.path.join(self.skill_dir, 'references', 'documentation')
|
docs_dir = os.path.join(self.skill_dir, 'references', 'documentation')
|
||||||
os.makedirs(docs_dir, exist_ok=True)
|
os.makedirs(docs_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Best-effort: copy docs-only reference files into unified docs references.
|
||||||
|
# UnifiedScraper runs doc_scraper using name "{name}_docs", which creates
|
||||||
|
# output/{name}_docs/references/*.md. Those are the most useful documentation
|
||||||
|
# references for the unified skill.
|
||||||
|
source_refs_dir = os.path.join('output', f"{self.name}_docs", 'references')
|
||||||
|
copied_files: List[str] = []
|
||||||
|
|
||||||
|
if os.path.isdir(source_refs_dir):
|
||||||
|
for entry in sorted(os.listdir(source_refs_dir)):
|
||||||
|
src_path = os.path.join(source_refs_dir, entry)
|
||||||
|
dst_path = os.path.join(docs_dir, entry)
|
||||||
|
if not os.path.isfile(src_path):
|
||||||
|
continue
|
||||||
|
shutil.copy2(src_path, dst_path)
|
||||||
|
copied_files.append(entry)
|
||||||
|
|
||||||
# Create index
|
# Create index
|
||||||
index_path = os.path.join(docs_dir, 'index.md')
|
index_path = os.path.join(docs_dir, 'index.md')
|
||||||
with open(index_path, 'w') as f:
|
with open(index_path, 'w', encoding='utf-8') as f:
|
||||||
f.write("# Documentation\n\n")
|
f.write("# Documentation\n\n")
|
||||||
f.write("Reference from official documentation.\n\n")
|
f.write("Reference from official documentation.\n\n")
|
||||||
|
|
||||||
|
if copied_files:
|
||||||
|
files_no_index = [p for p in copied_files if p.lower() != 'index.md']
|
||||||
|
files_index = [p for p in copied_files if p.lower() == 'index.md']
|
||||||
|
|
||||||
|
f.write("## Files\n\n")
|
||||||
|
for filename in files_no_index + files_index:
|
||||||
|
f.write(f"- [{filename}]({filename})\n")
|
||||||
|
else:
|
||||||
|
f.write("## Notes\n\n")
|
||||||
|
f.write(
|
||||||
|
"No documentation reference files were copied into this unified skill. "
|
||||||
|
"This usually means the docs-only build did not produce reference files.\n"
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("Created documentation references")
|
logger.info("Created documentation references")
|
||||||
|
|
||||||
def _generate_github_references(self):
|
def _generate_github_references(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user