Fix: include docs references in unified skill output (#213)
* Fix: include docs references in unified skill output * Fix: quality checker counts nested reference files * fix(unified): pass through llms_txt_url and skip_llms_txt to doc scraper * configs: add svelte CLI unified preset (llms.txt + categories) --------- Co-authored-by: Chris Engelhard <chris@chrisengelhard.nl>
This commit is contained in:
68
configs/svelte_cli_unified.json
Normal file
68
configs/svelte_cli_unified.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"name": "svelte-cli",
|
||||
"description": "Svelte CLI: docs (llms.txt) + GitHub repository (commands, project scaffolding, dev/build workflows).",
|
||||
"merge_mode": "rule-based",
|
||||
"sources": [
|
||||
{
|
||||
"type": "documentation",
|
||||
"base_url": "https://svelte.dev/docs/cli",
|
||||
"llms_txt_url": "https://svelte.dev/docs/cli/llms.txt",
|
||||
"extract_api": true,
|
||||
"selectors": {
|
||||
"main_content": "#main, main",
|
||||
"title": "h1",
|
||||
"code_blocks": "pre code, pre"
|
||||
},
|
||||
"url_patterns": {
|
||||
"include": ["/docs/cli"],
|
||||
"exclude": [
|
||||
"/docs/kit",
|
||||
"/docs/svelte",
|
||||
"/docs/mcp",
|
||||
"/tutorial",
|
||||
"/packages",
|
||||
"/playground",
|
||||
"/blog"
|
||||
]
|
||||
},
|
||||
"categories": {
|
||||
"overview": ["overview"],
|
||||
"faq": ["frequently asked questions"],
|
||||
"sv_create": ["sv create"],
|
||||
"sv_add": ["sv add"],
|
||||
"sv_check": ["sv check"],
|
||||
"sv_migrate": ["sv migrate"],
|
||||
"devtools_json": ["devtools-json"],
|
||||
"drizzle": ["drizzle"],
|
||||
"eslint": ["eslint"],
|
||||
"lucia": ["lucia"],
|
||||
"mcp": ["mcp"],
|
||||
"mdsvex": ["mdsvex"],
|
||||
"paraglide": ["paraglide"],
|
||||
"playwright": ["playwright"],
|
||||
"prettier": ["prettier"],
|
||||
"storybook": ["storybook"],
|
||||
"sveltekit_adapter": ["sveltekit-adapter"],
|
||||
"tailwindcss": ["tailwindcss"],
|
||||
"vitest": ["vitest"]
|
||||
},
|
||||
"rate_limit": 0.5,
|
||||
"max_pages": 200
|
||||
},
|
||||
{
|
||||
"type": "github",
|
||||
"repo": "sveltejs/cli",
|
||||
"include_issues": true,
|
||||
"max_issues": 150,
|
||||
"include_changelog": true,
|
||||
"include_releases": true,
|
||||
"include_code": true,
|
||||
"code_analysis_depth": "deep",
|
||||
"file_patterns": [
|
||||
"src/**/*.ts",
|
||||
"src/**/*.js"
|
||||
],
|
||||
"local_repo_path": "local_paths/sveltekit/cli"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -146,7 +146,7 @@ class SkillQualityChecker:
|
||||
'references/ directory not found - skill may be incomplete',
|
||||
str(self.references_dir)
|
||||
)
|
||||
elif not list(self.references_dir.glob('*.md')):
|
||||
elif not list(self.references_dir.rglob('*.md')):
|
||||
self.report.add_warning(
|
||||
'structure',
|
||||
'references/ directory is empty - no reference documentation found',
|
||||
@@ -298,7 +298,7 @@ class SkillQualityChecker:
|
||||
|
||||
# Check reference files
|
||||
if self.references_dir.exists():
|
||||
ref_files = list(self.references_dir.glob('*.md'))
|
||||
ref_files = list(self.references_dir.rglob('*.md'))
|
||||
if ref_files:
|
||||
self.report.add_info(
|
||||
'content',
|
||||
|
||||
@@ -129,6 +129,17 @@ class UnifiedScraper:
|
||||
'max_pages': source.get('max_pages', 100)
|
||||
}
|
||||
|
||||
# Pass through llms.txt settings (so unified configs behave the same as doc_scraper configs)
|
||||
if 'llms_txt_url' in source:
|
||||
doc_config['llms_txt_url'] = source.get('llms_txt_url')
|
||||
|
||||
if 'skip_llms_txt' in source:
|
||||
doc_config['skip_llms_txt'] = source.get('skip_llms_txt')
|
||||
|
||||
# Optional: support overriding start URLs
|
||||
if 'start_urls' in source:
|
||||
doc_config['start_urls'] = source.get('start_urls')
|
||||
|
||||
# Write temporary config
|
||||
temp_config_path = os.path.join(self.data_dir, 'temp_docs_config.json')
|
||||
with open(temp_config_path, 'w', encoding='utf-8') as f:
|
||||
|
||||
@@ -14,6 +14,7 @@ discrepancies transparently.
|
||||
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any, Optional
|
||||
@@ -286,12 +287,42 @@ This skill combines knowledge from multiple sources:
|
||||
docs_dir = os.path.join(self.skill_dir, 'references', 'documentation')
|
||||
os.makedirs(docs_dir, exist_ok=True)
|
||||
|
||||
# Best-effort: copy docs-only reference files into unified docs references.
|
||||
# UnifiedScraper runs doc_scraper using name "{name}_docs", which creates
|
||||
# output/{name}_docs/references/*.md. Those are the most useful documentation
|
||||
# references for the unified skill.
|
||||
source_refs_dir = os.path.join('output', f"{self.name}_docs", 'references')
|
||||
copied_files: List[str] = []
|
||||
|
||||
if os.path.isdir(source_refs_dir):
|
||||
for entry in sorted(os.listdir(source_refs_dir)):
|
||||
src_path = os.path.join(source_refs_dir, entry)
|
||||
dst_path = os.path.join(docs_dir, entry)
|
||||
if not os.path.isfile(src_path):
|
||||
continue
|
||||
shutil.copy2(src_path, dst_path)
|
||||
copied_files.append(entry)
|
||||
|
||||
# Create index
|
||||
index_path = os.path.join(docs_dir, 'index.md')
|
||||
with open(index_path, 'w') as f:
|
||||
with open(index_path, 'w', encoding='utf-8') as f:
|
||||
f.write("# Documentation\n\n")
|
||||
f.write("Reference from official documentation.\n\n")
|
||||
|
||||
if copied_files:
|
||||
files_no_index = [p for p in copied_files if p.lower() != 'index.md']
|
||||
files_index = [p for p in copied_files if p.lower() == 'index.md']
|
||||
|
||||
f.write("## Files\n\n")
|
||||
for filename in files_no_index + files_index:
|
||||
f.write(f"- [{filename}]({filename})\n")
|
||||
else:
|
||||
f.write("## Notes\n\n")
|
||||
f.write(
|
||||
"No documentation reference files were copied into this unified skill. "
|
||||
"This usually means the docs-only build did not produce reference files.\n"
|
||||
)
|
||||
|
||||
logger.info("Created documentation references")
|
||||
|
||||
def _generate_github_references(self):
|
||||
|
||||
Reference in New Issue
Block a user