simple test case

This commit is contained in:
2026-01-02 18:20:03 +05:30
parent 55245cf241
commit fa14a79ec9
3 changed files with 96 additions and 0 deletions

0
tests/__init__.py Normal file
View File

45
tests/conftest.py Normal file
View File

@@ -0,0 +1,45 @@
import pytest
import httpx
from omniread.core.content import ContentType
from omniread.html.scraper import HTMLScraper
TEST_HTML = b"""
<!DOCTYPE html>
<html>
<head>
<title>Test Page</title>
<meta name="description" content="Simple test page">
</head>
<body>
<div id="content">Hello World</div>
<a href="https://example.com">Link</a>
</body>
</html>
"""
def mock_transport(request: httpx.Request) -> httpx.Response:
"""
httpx MockTransport handler.
"""
return httpx.Response(
status_code=200,
headers={"Content-Type": ContentType.HTML.value},
content=TEST_HTML,
request=request,
)
@pytest.fixture
def http_scraper() -> HTMLScraper:
transport = httpx.MockTransport(mock_transport)
client = httpx.Client(transport=transport)
# Patch scraper to use our mocked client
scraper = HTMLScraper()
scraper._client = client # intentional test-only override
return scraper

51
tests/test_html.py Normal file
View File

@@ -0,0 +1,51 @@
from typing import Optional
from pydantic import BaseModel
from bs4 import Tag
from omniread.html.parser import HTMLParser
from omniread.core.content import Content
class ParsedHTML(BaseModel):
title: Optional[str]
description: Optional[str]
content: Optional[str]
link: Optional[str]
class TestHTMLParser(HTMLParser[ParsedHTML]):
"""
Concrete HTML parser with explicit Pydantic return type.
"""
def parse(self) -> ParsedHTML:
soup = self._soup
meta = self.parse_meta()
content_div = soup.find("div", id="content")
link_tag: Tag | None = soup.find("a")
return ParsedHTML(
title=meta["title"],
description=meta["meta"].get("description"),
content=self.parse_div(content_div) if content_div else None,
link=self.parse_link(link_tag) if link_tag else None,
)
def test_end_to_end_html_scrape_and_parse(http_scraper):
# --- Scrape (real scraper, mocked transport)
content: Content = http_scraper.fetch("https://test.local")
# --- Parse
parser = TestHTMLParser(content)
result = parser.parse()
# --- Assertions
assert isinstance(result, ParsedHTML)
assert result.title == "Test Page"
assert result.description == "Simple test page"
assert result.content == "Hello World"
assert result.link == "https://example.com"