fix: improve URL parsing and add test mocking for llms.txt detector
This commit is contained in:
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
from typing import Optional, Dict
|
from typing import Optional, Dict
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
class LlmsTxtDetector:
|
class LlmsTxtDetector:
|
||||||
"""Detect llms.txt files at documentation URLs"""
|
"""Detect llms.txt files at documentation URLs"""
|
||||||
@@ -23,9 +24,11 @@ class LlmsTxtDetector:
|
|||||||
Returns:
|
Returns:
|
||||||
Dict with 'url' and 'variant' keys, or None if not found
|
Dict with 'url' and 'variant' keys, or None if not found
|
||||||
"""
|
"""
|
||||||
|
parsed = urlparse(self.base_url)
|
||||||
|
root_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
for filename, variant in self.VARIANTS:
|
for filename, variant in self.VARIANTS:
|
||||||
# Try at base URL root
|
url = f"{root_url}/{filename}"
|
||||||
url = f"{self.base_url.split('/docs')[0]}/{filename}"
|
|
||||||
|
|
||||||
if self._check_url_exists(url):
|
if self._check_url_exists(url):
|
||||||
return {'url': url, 'variant': variant}
|
return {'url': url, 'variant': variant}
|
||||||
|
|||||||
@@ -1,13 +1,52 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
from unittest.mock import patch, Mock
|
||||||
from cli.llms_txt_detector import LlmsTxtDetector
|
from cli.llms_txt_detector import LlmsTxtDetector
|
||||||
|
|
||||||
def test_detect_llms_txt_variants():
|
def test_detect_llms_txt_variants():
|
||||||
"""Test detection of llms.txt file variants"""
|
"""Test detection of llms.txt file variants"""
|
||||||
detector = LlmsTxtDetector("https://hono.dev/docs")
|
detector = LlmsTxtDetector("https://hono.dev/docs")
|
||||||
|
|
||||||
# Mock responses
|
with patch('cli.llms_txt_detector.requests.head') as mock_head:
|
||||||
variants = detector.detect()
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_head.return_value = mock_response
|
||||||
|
|
||||||
assert variants is not None
|
variants = detector.detect()
|
||||||
assert 'url' in variants
|
|
||||||
assert 'variant' in variants # 'full', 'standard', 'small'
|
assert variants is not None
|
||||||
|
assert variants['url'] == 'https://hono.dev/llms-full.txt'
|
||||||
|
assert variants['variant'] == 'full'
|
||||||
|
mock_head.assert_called()
|
||||||
|
|
||||||
|
def test_detect_no_llms_txt():
|
||||||
|
"""Test detection when no llms.txt file exists"""
|
||||||
|
detector = LlmsTxtDetector("https://example.com/docs")
|
||||||
|
|
||||||
|
with patch('cli.llms_txt_detector.requests.head') as mock_head:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 404
|
||||||
|
mock_head.return_value = mock_response
|
||||||
|
|
||||||
|
variants = detector.detect()
|
||||||
|
|
||||||
|
assert variants is None
|
||||||
|
assert mock_head.call_count == 3 # Should try all three variants
|
||||||
|
|
||||||
|
def test_url_parsing_with_complex_paths():
|
||||||
|
"""Test URL parsing handles non-standard paths correctly"""
|
||||||
|
detector = LlmsTxtDetector("https://example.com/docs/v2/guide")
|
||||||
|
|
||||||
|
with patch('cli.llms_txt_detector.requests.head') as mock_head:
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_head.return_value = mock_response
|
||||||
|
|
||||||
|
variants = detector.detect()
|
||||||
|
|
||||||
|
assert variants is not None
|
||||||
|
assert variants['url'] == 'https://example.com/llms-full.txt'
|
||||||
|
mock_head.assert_called_with(
|
||||||
|
'https://example.com/llms-full.txt',
|
||||||
|
timeout=5,
|
||||||
|
allow_redirects=True
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user