46 lines
972 B
Python
46 lines
972 B
Python
import pytest
|
|
import httpx
|
|
|
|
from omniread.core.content import ContentType
|
|
from omniread.html.scraper import HTMLScraper
|
|
|
|
|
|
TEST_HTML = b"""
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Test Page</title>
|
|
<meta name="description" content="Simple test page">
|
|
</head>
|
|
<body>
|
|
<div id="content">Hello World</div>
|
|
<a href="https://example.com">Link</a>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
|
|
def mock_transport(request: httpx.Request) -> httpx.Response:
|
|
"""
|
|
httpx MockTransport handler.
|
|
"""
|
|
return httpx.Response(
|
|
status_code=200,
|
|
headers={"Content-Type": ContentType.HTML.value},
|
|
content=TEST_HTML,
|
|
request=request,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def http_scraper() -> HTMLScraper:
|
|
transport = httpx.MockTransport(mock_transport)
|
|
|
|
client = httpx.Client(transport=transport)
|
|
|
|
# Patch scraper to use our mocked client
|
|
scraper = HTMLScraper()
|
|
scraper._client = client # intentional test-only override
|
|
|
|
return scraper
|