diff --git a/tests/test_llms_txt_downloader.py b/tests/test_llms_txt_downloader.py
index 3b945fc..bcdc4dc 100644
--- a/tests/test_llms_txt_downloader.py
+++ b/tests/test_llms_txt_downloader.py
@@ -168,3 +168,95 @@ def test_get_proper_filename_small():
filename = downloader.get_proper_filename()
assert filename == "llms-small.md"
+
+def test_is_markdown_rejects_html_doctype():
+ """Test that HTML with DOCTYPE is rejected (prevents redirect trap)"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ html = '
Product PageContent'
+ assert not downloader._is_markdown(html)
+
+ # Test case-insensitive
+ html_uppercase = 'Content'
+ assert not downloader._is_markdown(html_uppercase)
+
+def test_is_markdown_rejects_html_tag():
+ """Test that HTML with tag is rejected (prevents redirect trap)"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ html = 'Content'
+ assert not downloader._is_markdown(html)
+
+ # Test with just opening tag
+ html_partial = 'Some content'
+ assert not downloader._is_markdown(html_partial)
+
+def test_is_markdown_rejects_html_meta():
+ """Test that HTML with or tags is rejected"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ html_with_head = 'PageContent'
+ assert not downloader._is_markdown(html_with_head)
+
+ html_with_meta = ''
+ assert not downloader._is_markdown(html_with_meta)
+
+def test_is_markdown_accepts_markdown_with_html_words():
+ """Test that markdown mentioning 'html' word is still accepted"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ markdown = '# Guide\n\nLearn about html tags in markdown. You can write HTML inside markdown.'
+ assert downloader._is_markdown(markdown)
+
+ # Test with actual markdown patterns
+ markdown_with_code = '# HTML Tutorial\n\n```html\nexample
\n```\n\n## More content'
+ assert downloader._is_markdown(markdown_with_code)
+
+def test_html_detection_only_scans_first_500_chars():
+ """Test that HTML detection only scans first 500 characters for performance"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ # HTML tag after 500 chars should not be detected
+ safe_markdown = '# Header\n\n' + ('Valid markdown content. ' * 50) + '\n\n'
+ # This should pass because is beyond first 500 chars
+ if len(safe_markdown[:500]) < len(''):
+ # If the HTML is within 500 chars, adjust test
+ assert not downloader._is_markdown(safe_markdown)
+ else:
+ # HTML beyond 500 chars should not trigger rejection
+ assert downloader._is_markdown(safe_markdown)
+
+def test_html_redirect_trap_scenario():
+ """Test real-world scenario: llms.txt redirects to HTML product page"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ # Simulate Claude Code redirect scenario (302 to HTML page)
+ html_product_page = '''
+
+
+
+ Claude Code - Product Page
+
+
+ Claude Code
+ Product information...
+
+'''
+
+ # Should reject this HTML even though it has tag (looks like markdown "# ")
+ assert not downloader._is_markdown(html_product_page)
+
+def test_download_rejects_html_redirect():
+ """Test that download() properly rejects HTML redirects"""
+ downloader = LlmsTxtDownloader("https://example.com/llms.txt")
+
+ mock_response = Mock()
+ # Simulate server returning HTML instead of markdown
+ mock_response.text = 'Product Page
'
+ mock_response.raise_for_status = Mock()
+
+ with patch('requests.get', return_value=mock_response):
+ content = downloader.download()
+
+ # Should return None (rejected as non-markdown)
+ assert content is None