simple test case

This commit is contained in:
2026-01-02 18:20:03 +05:30
parent 55245cf241
commit fa14a79ec9
3 changed files with 96 additions and 0 deletions

51
tests/test_html.py Normal file
View File

@@ -0,0 +1,51 @@
from typing import Optional
from pydantic import BaseModel
from bs4 import Tag
from omniread.html.parser import HTMLParser
from omniread.core.content import Content
class ParsedHTML(BaseModel):
title: Optional[str]
description: Optional[str]
content: Optional[str]
link: Optional[str]
class TestHTMLParser(HTMLParser[ParsedHTML]):
"""
Concrete HTML parser with explicit Pydantic return type.
"""
def parse(self) -> ParsedHTML:
soup = self._soup
meta = self.parse_meta()
content_div = soup.find("div", id="content")
link_tag: Tag | None = soup.find("a")
return ParsedHTML(
title=meta["title"],
description=meta["meta"].get("description"),
content=self.parse_div(content_div) if content_div else None,
link=self.parse_link(link_tag) if link_tag else None,
)
def test_end_to_end_html_scrape_and_parse(http_scraper):
# --- Scrape (real scraper, mocked transport)
content: Content = http_scraper.fetch("https://test.local")
# --- Parse
parser = TestHTMLParser(content)
result = parser.parse()
# --- Assertions
assert isinstance(result, ParsedHTML)
assert result.title == "Test Page"
assert result.description == "Simple test page"
assert result.content == "Hello World"
assert result.link == "https://example.com"