simple test case

2026-01-02 18:20:03 +05:30
parent 55245cf241
commit fa14a79ec9
3 changed files with 96 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,45 @@
 import pytest
 import httpx
 from omniread.core.content import ContentType
 from omniread.html.scraper import HTMLScraper
 TEST_HTML = b"""
 <!DOCTYPE html>
 <html>
  <head>
    <title>Test Page</title>
    <meta name="description" content="Simple test page">
  </head>
  <body>
    <div id="content">Hello World</div>
    <a href="https://example.com">Link</a>
  </body>
 </html>
 """
 def mock_transport(request: httpx.Request) -> httpx.Response:
    """
    httpx MockTransport handler.
    """
    return httpx.Response(
        status_code=200,
        headers={"Content-Type": ContentType.HTML.value},
        content=TEST_HTML,
        request=request,
    )
@pytest.fixture
 def http_scraper() -> HTMLScraper:
    transport = httpx.MockTransport(mock_transport)
    client = httpx.Client(transport=transport)
    # Patch scraper to use our mocked client
    scraper = HTMLScraper()
    scraper._client = client  # intentional test-only override
    return scraper
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -0,0 +1,51 @@
 from typing import Optional
 from pydantic import BaseModel
 from bs4 import Tag
 from omniread.html.parser import HTMLParser
 from omniread.core.content import Content
 class ParsedHTML(BaseModel):
    title: Optional[str]
    description: Optional[str]
    content: Optional[str]
    link: Optional[str]
 class TestHTMLParser(HTMLParser[ParsedHTML]):
    """
    Concrete HTML parser with explicit Pydantic return type.
    """
    def parse(self) -> ParsedHTML:
        soup = self._soup
        meta = self.parse_meta()
        content_div = soup.find("div", id="content")
        link_tag: Tag | None = soup.find("a")
        return ParsedHTML(
            title=meta["title"],
            description=meta["meta"].get("description"),
            content=self.parse_div(content_div) if content_div else None,
            link=self.parse_link(link_tag) if link_tag else None,
        )
 def test_end_to_end_html_scrape_and_parse(http_scraper):
    # --- Scrape (real scraper, mocked transport)
    content: Content = http_scraper.fetch("https://test.local")
    # --- Parse
    parser = TestHTMLParser(content)
    result = parser.parse()
    # --- Assertions
    assert isinstance(result, ParsedHTML)
    assert result.title == "Test Page"
    assert result.description == "Simple test page"
    assert result.content == "Hello World"
    assert result.link == "https://example.com"