Files
skill-seekers-reference/tests/test_browser_renderer.py
yusyus 00c72ea4a3 fix: resolve CI failures across all GitHub Actions workflows
- Fix ruff format issue in doc_scraper.py
- Add pytest skip markers for browser renderer tests when Playwright is
  not installed in CI
- Replace broken Python heredocs in 4 workflow YAML files
  (scheduled-updates, vector-db-export, quality-metrics, test-vector-dbs)
  with python3 -c calls to fix YAML parsing errors

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-29 20:40:45 +03:00

165 lines
5.4 KiB
Python

"""Tests for browser_renderer.py (#321).
Real end-to-end tests using actual Playwright + Chromium.
"""
from __future__ import annotations
import pytest
from skill_seekers.cli.browser_renderer import (
BrowserRenderer,
_auto_install_chromium,
_check_playwright_available,
)
# Skip all real browser tests when Playwright is not installed
_has_playwright = _check_playwright_available()
requires_playwright = pytest.mark.skipif(
not _has_playwright,
reason="Playwright not installed (pip install 'skill-seekers[browser]')",
)
@requires_playwright
class TestPlaywrightAvailability:
"""Test that playwright is properly detected."""
def test_playwright_is_available(self):
assert _check_playwright_available() is True
def test_auto_install_succeeds(self):
# Chromium is already installed, so this should be a no-op success
assert _auto_install_chromium() is True
@requires_playwright
class TestBrowserRendererReal:
"""Real end-to-end tests with actual Chromium."""
def test_render_simple_page(self):
"""Render a real page and get HTML back."""
with BrowserRenderer() as renderer:
html = renderer.render_page("https://example.com")
assert "<html" in html.lower()
assert "Example Domain" in html
def test_render_returns_js_content(self):
"""Verify that JS-generated content is captured (not just the shell)."""
with BrowserRenderer() as renderer:
html = renderer.render_page("https://example.com")
# example.com has static content, but the point is we get real HTML
assert len(html) > 500
assert "<body" in html.lower()
def test_multiple_pages_reuse_browser(self):
"""Rendering multiple pages should reuse the same browser instance."""
with BrowserRenderer() as renderer:
html1 = renderer.render_page("https://example.com")
html2 = renderer.render_page("https://example.com")
assert "Example Domain" in html1
assert "Example Domain" in html2
def test_close_cleans_up(self):
"""After close(), internal state is None."""
renderer = BrowserRenderer()
renderer.render_page("https://example.com")
assert renderer._browser is not None
renderer.close()
assert renderer._browser is None
assert renderer._context is None
assert renderer._playwright is None
def test_context_manager_cleans_up(self):
"""Context manager calls close on exit."""
with BrowserRenderer() as renderer:
renderer.render_page("https://example.com")
assert renderer._browser is not None
assert renderer._browser is None
def test_timeout_parameter(self):
"""Custom timeout is respected."""
renderer = BrowserRenderer(timeout=5000)
assert renderer._timeout == 5000
renderer.close()
def test_wait_until_parameter(self):
"""Custom wait_until is respected."""
renderer = BrowserRenderer(wait_until="domcontentloaded")
assert renderer._wait_until == "domcontentloaded"
renderer.close()
class TestDocScraperBrowserIntegration:
"""Test that doc_scraper correctly accepts browser config."""
def test_browser_mode_config_sets_attribute(self):
from skill_seekers.cli.doc_scraper import DocToSkillConverter
config = {
"name": "test",
"base_url": "https://example.com",
"browser": True,
"selectors": {},
"url_patterns": {"include": [], "exclude": []},
}
scraper = DocToSkillConverter(config)
assert scraper.browser_mode is True
assert scraper._browser_renderer is None
def test_browser_mode_default_false(self):
from skill_seekers.cli.doc_scraper import DocToSkillConverter
config = {
"name": "test",
"base_url": "https://example.com",
"selectors": {},
"url_patterns": {"include": [], "exclude": []},
}
scraper = DocToSkillConverter(config)
assert scraper.browser_mode is False
@requires_playwright
def test_render_with_browser_returns_html(self):
"""Test the _render_with_browser helper directly."""
from skill_seekers.cli.doc_scraper import DocToSkillConverter
config = {
"name": "test",
"base_url": "https://example.com",
"browser": True,
"selectors": {},
"url_patterns": {"include": [], "exclude": []},
}
scraper = DocToSkillConverter(config)
html = scraper._render_with_browser("https://example.com")
assert "Example Domain" in html
assert scraper._browser_renderer is not None
# Clean up
scraper._browser_renderer.close()
class TestBrowserArgument:
"""Test --browser argument is registered in CLI."""
def test_scrape_parser_accepts_browser_flag(self):
from skill_seekers.cli.doc_scraper import setup_argument_parser
parser = setup_argument_parser()
args = parser.parse_args(["--name", "test", "--url", "https://example.com", "--browser"])
assert args.browser is True
def test_scrape_parser_browser_default_false(self):
from skill_seekers.cli.doc_scraper import setup_argument_parser
parser = setup_argument_parser()
args = parser.parse_args(["--name", "test", "--url", "https://example.com"])
assert args.browser is False