feat: add llms.txt detection module

This commit is contained in:
Edgar I.
2025-10-24 13:11:12 +04:00
parent 691318117c
commit 8f44193b61
2 changed files with 54 additions and 0 deletions

41
cli/llms_txt_detector.py Normal file
View File

@@ -0,0 +1,41 @@
# ABOUTME: Detects and validates llms.txt file availability at documentation URLs
# ABOUTME: Supports llms-full.txt, llms.txt, and llms-small.txt variants
import requests
from typing import Optional, Dict
class LlmsTxtDetector:
"""Detect llms.txt files at documentation URLs"""
VARIANTS = [
('llms-full.txt', 'full'),
('llms.txt', 'standard'),
('llms-small.txt', 'small')
]
def __init__(self, base_url: str):
self.base_url = base_url.rstrip('/')
def detect(self) -> Optional[Dict[str, str]]:
"""
Detect available llms.txt variant.
Returns:
Dict with 'url' and 'variant' keys, or None if not found
"""
for filename, variant in self.VARIANTS:
# Try at base URL root
url = f"{self.base_url.split('/docs')[0]}/{filename}"
if self._check_url_exists(url):
return {'url': url, 'variant': variant}
return None
def _check_url_exists(self, url: str) -> bool:
"""Check if URL returns 200 status"""
try:
response = requests.head(url, timeout=5, allow_redirects=True)
return response.status_code == 200
except requests.RequestException:
return False

View File

@@ -0,0 +1,13 @@
import pytest
from cli.llms_txt_detector import LlmsTxtDetector
def test_detect_llms_txt_variants():
"""Test detection of llms.txt file variants"""
detector = LlmsTxtDetector("https://hono.dev/docs")
# Mock responses
variants = detector.detect()
assert variants is not None
assert 'url' in variants
assert 'variant' in variants # 'full', 'standard', 'small'