feat: support explicit llms_txt_url in config

This commit is contained in:
Edgar I.
2025-10-24 13:40:36 +04:00
parent 0b6c2ed593
commit 99a40d3a1b
2 changed files with 38 additions and 0 deletions

View File

@@ -341,6 +341,33 @@ class DocToSkillConverter:
Returns:
True if llms.txt was found and parsed successfully
"""
# Check for explicit config URL first
explicit_url = self.config.get('llms_txt_url')
if explicit_url:
print(f"\n📌 Using explicit llms_txt_url from config: {explicit_url}")
downloader = LlmsTxtDownloader(explicit_url)
content = downloader.download()
if not content:
print("⚠️ Failed to download, falling back to auto-detection")
# Continue to auto-detection below
else:
# Parse and save (same as auto-detected flow)
parser = LlmsTxtParser(content)
pages = parser.parse()
if pages:
print(f"📄 Parsed {len(pages)} sections")
for page in pages:
self.save_page(page)
self.pages.append(page)
self.llms_txt_detected = True
self.llms_txt_variant = 'explicit'
return True
# Original auto-detection logic continues...
print(f"\n🔍 Checking for llms.txt at {self.base_url}...")
# Detect llms.txt

View File

@@ -296,6 +296,17 @@ class TestConfigValidation(unittest.TestCase):
url_errors = [e for e in errors if 'start_url' in e.lower()]
self.assertEqual(len(url_errors), 0, "Valid start_urls should pass validation")
def test_config_with_llms_txt_url(self):
"""Test config validation with explicit llms_txt_url"""
config = {
'name': 'test',
'llms_txt_url': 'https://example.com/llms-full.txt',
'base_url': 'https://example.com/docs'
}
# Should be valid
self.assertEqual(config.get('llms_txt_url'), 'https://example.com/llms-full.txt')
if __name__ == '__main__':
unittest.main()