feat: add llms.txt downloader with error handling

This commit is contained in:
Edgar I.
2025-10-24 13:25:34 +04:00
parent a18ea8cf68
commit 3dd928b34b
2 changed files with 55 additions and 0 deletions

View File

@@ -0,0 +1,43 @@
"""ABOUTME: Downloads llms.txt files from documentation URLs with error handling"""
"""ABOUTME: Handles timeouts, retries, and validates content before returning"""
import requests
from typing import Optional
class LlmsTxtDownloader:
"""Download llms.txt content from URLs"""
def __init__(self, url: str, timeout: int = 30):
self.url = url
self.timeout = timeout
def download(self) -> Optional[str]:
"""
Download llms.txt content.
Returns:
String content or None if download fails
"""
try:
headers = {
'User-Agent': 'Skill-Seekers-llms.txt-Reader/1.0'
}
response = requests.get(
self.url,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
content = response.text
# Validate content is not empty and looks like markdown
if len(content) < 100:
return None
return content
except requests.RequestException as e:
print(f"❌ Failed to download {self.url}: {e}")
return None

View File

@@ -0,0 +1,12 @@
import pytest
from cli.llms_txt_downloader import LlmsTxtDownloader
def test_download_llms_txt():
"""Test downloading llms.txt content"""
downloader = LlmsTxtDownloader("https://hono.dev/llms-full.txt")
content = downloader.download()
assert content is not None
assert len(content) > 100 # Should have substantial content
assert isinstance(content, str)