simple test case

2026-01-02 18:20:03 +05:30
parent 55245cf241
commit fa14a79ec9
3 changed files with 96 additions and 0 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,45 @@
+import pytest
+import httpx
+
+from omniread.core.content import ContentType
+from omniread.html.scraper import HTMLScraper
+
+
+TEST_HTML = b"""
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Test Page</title>
+    <meta name="description" content="Simple test page">
+  </head>
+  <body>
+    <div id="content">Hello World</div>
+    <a href="https://example.com">Link</a>
+  </body>
+</html>
+"""
+
+
+def mock_transport(request: httpx.Request) -> httpx.Response:
+    """
+    httpx MockTransport handler.
+    """
+    return httpx.Response(
+        status_code=200,
+        headers={"Content-Type": ContentType.HTML.value},
+        content=TEST_HTML,
+        request=request,
+    )
+
+
+@pytest.fixture
+def http_scraper() -> HTMLScraper:
+    transport = httpx.MockTransport(mock_transport)
+
+    client = httpx.Client(transport=transport)
+
+    # Patch scraper to use our mocked client
+    scraper = HTMLScraper()
+    scraper._client = client  # intentional test-only override
+
+    return scraper
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -0,0 +1,51 @@
+from typing import Optional
+
+from pydantic import BaseModel
+from bs4 import Tag
+
+from omniread.html.parser import HTMLParser
+from omniread.core.content import Content
+
+
+class ParsedHTML(BaseModel):
+    title: Optional[str]
+    description: Optional[str]
+    content: Optional[str]
+    link: Optional[str]
+
+
+class TestHTMLParser(HTMLParser[ParsedHTML]):
+    """
+    Concrete HTML parser with explicit Pydantic return type.
+    """
+
+    def parse(self) -> ParsedHTML:
+        soup = self._soup
+        meta = self.parse_meta()
+
+        content_div = soup.find("div", id="content")
+        link_tag: Tag | None = soup.find("a")
+
+        return ParsedHTML(
+            title=meta["title"],
+            description=meta["meta"].get("description"),
+            content=self.parse_div(content_div) if content_div else None,
+            link=self.parse_link(link_tag) if link_tag else None,
+        )
+
+
+def test_end_to_end_html_scrape_and_parse(http_scraper):
+    # --- Scrape (real scraper, mocked transport)
+    content: Content = http_scraper.fetch("https://test.local")
+
+    # --- Parse
+    parser = TestHTMLParser(content)
+    result = parser.parse()
+
+    # --- Assertions
+    assert isinstance(result, ParsedHTML)
+
+    assert result.title == "Test Page"
+    assert result.description == "Simple test page"
+    assert result.content == "Hello World"
+    assert result.link == "https://example.com"