From 3dd928b34be769eef92aa0f91ec52b087c7ac5f6 Mon Sep 17 00:00:00 2001 From: "Edgar I." Date: Fri, 24 Oct 2025 13:25:34 +0400 Subject: [PATCH] feat: add llms.txt downloader with error handling --- cli/llms_txt_downloader.py | 43 +++++++++++++++++++++++++++++++ tests/test_llms_txt_downloader.py | 12 +++++++++ 2 files changed, 55 insertions(+) create mode 100644 cli/llms_txt_downloader.py create mode 100644 tests/test_llms_txt_downloader.py diff --git a/cli/llms_txt_downloader.py b/cli/llms_txt_downloader.py new file mode 100644 index 0000000..1ce60ca --- /dev/null +++ b/cli/llms_txt_downloader.py @@ -0,0 +1,43 @@ +"""ABOUTME: Downloads llms.txt files from documentation URLs with error handling""" +"""ABOUTME: Handles timeouts, retries, and validates content before returning""" + +import requests +from typing import Optional + +class LlmsTxtDownloader: + """Download llms.txt content from URLs""" + + def __init__(self, url: str, timeout: int = 30): + self.url = url + self.timeout = timeout + + def download(self) -> Optional[str]: + """ + Download llms.txt content. + + Returns: + String content or None if download fails + """ + try: + headers = { + 'User-Agent': 'Skill-Seekers-llms.txt-Reader/1.0' + } + + response = requests.get( + self.url, + headers=headers, + timeout=self.timeout + ) + response.raise_for_status() + + content = response.text + + # Validate content is not empty and looks like markdown + if len(content) < 100: + return None + + return content + + except requests.RequestException as e: + print(f"❌ Failed to download {self.url}: {e}") + return None diff --git a/tests/test_llms_txt_downloader.py b/tests/test_llms_txt_downloader.py new file mode 100644 index 0000000..a902b57 --- /dev/null +++ b/tests/test_llms_txt_downloader.py @@ -0,0 +1,12 @@ +import pytest +from cli.llms_txt_downloader import LlmsTxtDownloader + +def test_download_llms_txt(): + """Test downloading llms.txt content""" + downloader = LlmsTxtDownloader("https://hono.dev/llms-full.txt") + + content = downloader.download() + + assert content is not None + assert len(content) > 100 # Should have substantial content + assert isinstance(content, str)