feat(testing): add end-to-end HTML scraping and parsing tests with typed parsers

- Add smart httpx MockTransport routing based on endpoint paths - Render HTML fixtures via Jinja templates populated from JSON data - Introduce explicit, typed HTML parsers for semantic and table-based content - Add end-to-end tests covering scraper → content → parser → Pydantic models - Enforce explicit output contracts and avoid default dict-based parsing
2026-01-02 18:31:34 +05:30
parent fa14a79ec9
commit 07293e4651
8 changed files with 156 additions and 31 deletions
--- a/tests/test_html_table.py
+++ b/tests/test_html_table.py
@@ -0,0 +1,44 @@
+from typing import Optional
+
+from pydantic import BaseModel
+
+from omniread.html.parser import HTMLParser
+from omniread.core.content import Content
+
+
+class ParsedTableHTML(BaseModel):
+    title: Optional[str]
+    table: list[list[str]]
+
+
+class TableHTMLParser(HTMLParser[ParsedTableHTML]):
+    """
+    Parser focused on extracting tabular data.
+    """
+
+    def parse(self) -> ParsedTableHTML:
+        soup = self._soup
+
+        table_tag = soup.find("table")
+
+        return ParsedTableHTML(
+            title=soup.title.string.strip() if soup.title else None,
+            table=self.parse_table(table_tag) if table_tag else [],
+        )
+
+
+def test_end_to_end_html_table(http_scraper):
+    content: Content = http_scraper.fetch("https://test.local/table")
+
+    parser = TableHTMLParser(content)
+    result = parser.parse()
+
+    assert isinstance(result, ParsedTableHTML)
+
+    assert result.title == "Table Test Page"
+    assert result.table == [
+        ["Name", "Age", "City"],
+        ["Alice", "30", "London"],
+        ["Bob", "25", "New York"],
+        ["Charlie", "35", "Berlin"],
+    ]