fix: improve URL parsing and add test mocking for llms.txt detector

This commit is contained in:
Edgar I.
2025-10-24 13:18:30 +04:00
parent 8f44193b61
commit 60fefb6c0b
2 changed files with 49 additions and 7 deletions

View File

@@ -3,6 +3,7 @@
import requests
from typing import Optional, Dict
from urllib.parse import urlparse
class LlmsTxtDetector:
"""Detect llms.txt files at documentation URLs"""
@@ -23,9 +24,11 @@ class LlmsTxtDetector:
Returns:
Dict with 'url' and 'variant' keys, or None if not found
"""
parsed = urlparse(self.base_url)
root_url = f"{parsed.scheme}://{parsed.netloc}"
for filename, variant in self.VARIANTS:
# Try at base URL root
url = f"{self.base_url.split('/docs')[0]}/{filename}"
url = f"{root_url}/{filename}"
if self._check_url_exists(url):
return {'url': url, 'variant': variant}