feat: add get_proper_filename() for .txt to .md conversion
This commit is contained in:
@@ -13,6 +13,29 @@ class LlmsTxtDownloader:
|
|||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.max_retries = max_retries
|
self.max_retries = max_retries
|
||||||
|
|
||||||
|
def get_proper_filename(self) -> str:
|
||||||
|
"""
|
||||||
|
Extract filename from URL and convert .txt to .md
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Proper filename with .md extension
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
https://hono.dev/llms-full.txt -> llms-full.md
|
||||||
|
https://hono.dev/llms.txt -> llms.md
|
||||||
|
https://hono.dev/llms-small.txt -> llms-small.md
|
||||||
|
"""
|
||||||
|
# Extract filename from URL
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(self.url)
|
||||||
|
filename = parsed.path.split('/')[-1]
|
||||||
|
|
||||||
|
# Replace .txt with .md
|
||||||
|
if filename.endswith('.txt'):
|
||||||
|
filename = filename[:-4] + '.md'
|
||||||
|
|
||||||
|
return filename
|
||||||
|
|
||||||
def _is_markdown(self, content: str) -> bool:
|
def _is_markdown(self, content: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if content looks like markdown.
|
Check if content looks like markdown.
|
||||||
|
|||||||
@@ -143,3 +143,28 @@ def test_user_agent_header():
|
|||||||
# Verify custom user agent was passed
|
# Verify custom user agent was passed
|
||||||
call_kwargs = mock_get.call_args[1]
|
call_kwargs = mock_get.call_args[1]
|
||||||
assert call_kwargs['headers']['User-Agent'] == 'Skill-Seekers-llms.txt-Reader/1.0'
|
assert call_kwargs['headers']['User-Agent'] == 'Skill-Seekers-llms.txt-Reader/1.0'
|
||||||
|
|
||||||
|
def test_get_proper_filename():
|
||||||
|
"""Test filename conversion from .txt to .md"""
|
||||||
|
downloader = LlmsTxtDownloader("https://hono.dev/llms-full.txt")
|
||||||
|
|
||||||
|
filename = downloader.get_proper_filename()
|
||||||
|
|
||||||
|
assert filename == "llms-full.md"
|
||||||
|
assert not filename.endswith('.txt')
|
||||||
|
|
||||||
|
def test_get_proper_filename_standard():
|
||||||
|
"""Test standard variant naming"""
|
||||||
|
downloader = LlmsTxtDownloader("https://hono.dev/llms.txt")
|
||||||
|
|
||||||
|
filename = downloader.get_proper_filename()
|
||||||
|
|
||||||
|
assert filename == "llms.md"
|
||||||
|
|
||||||
|
def test_get_proper_filename_small():
|
||||||
|
"""Test small variant naming"""
|
||||||
|
downloader = LlmsTxtDownloader("https://hono.dev/llms-small.txt")
|
||||||
|
|
||||||
|
filename = downloader.get_proper_filename()
|
||||||
|
|
||||||
|
assert filename == "llms-small.md"
|
||||||
|
|||||||
Reference in New Issue
Block a user