Fix: include docs references in unified skill output (#213)

* Fix: include docs references in unified skill output

* Fix: quality checker counts nested reference files

* fix(unified): pass through llms_txt_url and skip_llms_txt to doc scraper

* configs: add svelte CLI unified preset (llms.txt + categories)

---------

Co-authored-by: Chris Engelhard <chris@chrisengelhard.nl>
This commit is contained in:
Chris Engelhard
2026-01-01 17:40:51 +01:00
committed by GitHub
parent 98d73611ad
commit 9949cdcdca
4 changed files with 113 additions and 3 deletions

View File

@@ -0,0 +1,68 @@
{
"name": "svelte-cli",
"description": "Svelte CLI: docs (llms.txt) + GitHub repository (commands, project scaffolding, dev/build workflows).",
"merge_mode": "rule-based",
"sources": [
{
"type": "documentation",
"base_url": "https://svelte.dev/docs/cli",
"llms_txt_url": "https://svelte.dev/docs/cli/llms.txt",
"extract_api": true,
"selectors": {
"main_content": "#main, main",
"title": "h1",
"code_blocks": "pre code, pre"
},
"url_patterns": {
"include": ["/docs/cli"],
"exclude": [
"/docs/kit",
"/docs/svelte",
"/docs/mcp",
"/tutorial",
"/packages",
"/playground",
"/blog"
]
},
"categories": {
"overview": ["overview"],
"faq": ["frequently asked questions"],
"sv_create": ["sv create"],
"sv_add": ["sv add"],
"sv_check": ["sv check"],
"sv_migrate": ["sv migrate"],
"devtools_json": ["devtools-json"],
"drizzle": ["drizzle"],
"eslint": ["eslint"],
"lucia": ["lucia"],
"mcp": ["mcp"],
"mdsvex": ["mdsvex"],
"paraglide": ["paraglide"],
"playwright": ["playwright"],
"prettier": ["prettier"],
"storybook": ["storybook"],
"sveltekit_adapter": ["sveltekit-adapter"],
"tailwindcss": ["tailwindcss"],
"vitest": ["vitest"]
},
"rate_limit": 0.5,
"max_pages": 200
},
{
"type": "github",
"repo": "sveltejs/cli",
"include_issues": true,
"max_issues": 150,
"include_changelog": true,
"include_releases": true,
"include_code": true,
"code_analysis_depth": "deep",
"file_patterns": [
"src/**/*.ts",
"src/**/*.js"
],
"local_repo_path": "local_paths/sveltekit/cli"
}
]
}

View File

@@ -146,7 +146,7 @@ class SkillQualityChecker:
'references/ directory not found - skill may be incomplete', 'references/ directory not found - skill may be incomplete',
str(self.references_dir) str(self.references_dir)
) )
elif not list(self.references_dir.glob('*.md')): elif not list(self.references_dir.rglob('*.md')):
self.report.add_warning( self.report.add_warning(
'structure', 'structure',
'references/ directory is empty - no reference documentation found', 'references/ directory is empty - no reference documentation found',
@@ -298,7 +298,7 @@ class SkillQualityChecker:
# Check reference files # Check reference files
if self.references_dir.exists(): if self.references_dir.exists():
ref_files = list(self.references_dir.glob('*.md')) ref_files = list(self.references_dir.rglob('*.md'))
if ref_files: if ref_files:
self.report.add_info( self.report.add_info(
'content', 'content',

View File

@@ -129,6 +129,17 @@ class UnifiedScraper:
'max_pages': source.get('max_pages', 100) 'max_pages': source.get('max_pages', 100)
} }
# Pass through llms.txt settings (so unified configs behave the same as doc_scraper configs)
if 'llms_txt_url' in source:
doc_config['llms_txt_url'] = source.get('llms_txt_url')
if 'skip_llms_txt' in source:
doc_config['skip_llms_txt'] = source.get('skip_llms_txt')
# Optional: support overriding start URLs
if 'start_urls' in source:
doc_config['start_urls'] = source.get('start_urls')
# Write temporary config # Write temporary config
temp_config_path = os.path.join(self.data_dir, 'temp_docs_config.json') temp_config_path = os.path.join(self.data_dir, 'temp_docs_config.json')
with open(temp_config_path, 'w', encoding='utf-8') as f: with open(temp_config_path, 'w', encoding='utf-8') as f:

View File

@@ -14,6 +14,7 @@ discrepancies transparently.
import os import os
import json import json
import shutil
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Dict, List, Any, Optional from typing import Dict, List, Any, Optional
@@ -286,12 +287,42 @@ This skill combines knowledge from multiple sources:
docs_dir = os.path.join(self.skill_dir, 'references', 'documentation') docs_dir = os.path.join(self.skill_dir, 'references', 'documentation')
os.makedirs(docs_dir, exist_ok=True) os.makedirs(docs_dir, exist_ok=True)
# Best-effort: copy docs-only reference files into unified docs references.
# UnifiedScraper runs doc_scraper using name "{name}_docs", which creates
# output/{name}_docs/references/*.md. Those are the most useful documentation
# references for the unified skill.
source_refs_dir = os.path.join('output', f"{self.name}_docs", 'references')
copied_files: List[str] = []
if os.path.isdir(source_refs_dir):
for entry in sorted(os.listdir(source_refs_dir)):
src_path = os.path.join(source_refs_dir, entry)
dst_path = os.path.join(docs_dir, entry)
if not os.path.isfile(src_path):
continue
shutil.copy2(src_path, dst_path)
copied_files.append(entry)
# Create index # Create index
index_path = os.path.join(docs_dir, 'index.md') index_path = os.path.join(docs_dir, 'index.md')
with open(index_path, 'w') as f: with open(index_path, 'w', encoding='utf-8') as f:
f.write("# Documentation\n\n") f.write("# Documentation\n\n")
f.write("Reference from official documentation.\n\n") f.write("Reference from official documentation.\n\n")
if copied_files:
files_no_index = [p for p in copied_files if p.lower() != 'index.md']
files_index = [p for p in copied_files if p.lower() == 'index.md']
f.write("## Files\n\n")
for filename in files_no_index + files_index:
f.write(f"- [{filename}]({filename})\n")
else:
f.write("## Notes\n\n")
f.write(
"No documentation reference files were copied into this unified skill. "
"This usually means the docs-only build did not produce reference files.\n"
)
logger.info("Created documentation references") logger.info("Created documentation references")
def _generate_github_references(self): def _generate_github_references(self):