simple test case
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
45
tests/conftest.py
Normal file
45
tests/conftest.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
from omniread.core.content import ContentType
|
||||
from omniread.html.scraper import HTMLScraper
|
||||
|
||||
|
||||
TEST_HTML = b"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Page</title>
|
||||
<meta name="description" content="Simple test page">
|
||||
</head>
|
||||
<body>
|
||||
<div id="content">Hello World</div>
|
||||
<a href="https://example.com">Link</a>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
def mock_transport(request: httpx.Request) -> httpx.Response:
|
||||
"""
|
||||
httpx MockTransport handler.
|
||||
"""
|
||||
return httpx.Response(
|
||||
status_code=200,
|
||||
headers={"Content-Type": ContentType.HTML.value},
|
||||
content=TEST_HTML,
|
||||
request=request,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def http_scraper() -> HTMLScraper:
|
||||
transport = httpx.MockTransport(mock_transport)
|
||||
|
||||
client = httpx.Client(transport=transport)
|
||||
|
||||
# Patch scraper to use our mocked client
|
||||
scraper = HTMLScraper()
|
||||
scraper._client = client # intentional test-only override
|
||||
|
||||
return scraper
|
||||
51
tests/test_html.py
Normal file
51
tests/test_html.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
from bs4 import Tag
|
||||
|
||||
from omniread.html.parser import HTMLParser
|
||||
from omniread.core.content import Content
|
||||
|
||||
|
||||
class ParsedHTML(BaseModel):
|
||||
title: Optional[str]
|
||||
description: Optional[str]
|
||||
content: Optional[str]
|
||||
link: Optional[str]
|
||||
|
||||
|
||||
class TestHTMLParser(HTMLParser[ParsedHTML]):
|
||||
"""
|
||||
Concrete HTML parser with explicit Pydantic return type.
|
||||
"""
|
||||
|
||||
def parse(self) -> ParsedHTML:
|
||||
soup = self._soup
|
||||
meta = self.parse_meta()
|
||||
|
||||
content_div = soup.find("div", id="content")
|
||||
link_tag: Tag | None = soup.find("a")
|
||||
|
||||
return ParsedHTML(
|
||||
title=meta["title"],
|
||||
description=meta["meta"].get("description"),
|
||||
content=self.parse_div(content_div) if content_div else None,
|
||||
link=self.parse_link(link_tag) if link_tag else None,
|
||||
)
|
||||
|
||||
|
||||
def test_end_to_end_html_scrape_and_parse(http_scraper):
|
||||
# --- Scrape (real scraper, mocked transport)
|
||||
content: Content = http_scraper.fetch("https://test.local")
|
||||
|
||||
# --- Parse
|
||||
parser = TestHTMLParser(content)
|
||||
result = parser.parse()
|
||||
|
||||
# --- Assertions
|
||||
assert isinstance(result, ParsedHTML)
|
||||
|
||||
assert result.title == "Test Page"
|
||||
assert result.description == "Simple test page"
|
||||
assert result.content == "Hello World"
|
||||
assert result.link == "https://example.com"
|
||||
Reference in New Issue
Block a user