Files
omniread/tests/conftest.py
Vishesh 'ironeagle' Bangotra 07293e4651 feat(testing): add end-to-end HTML scraping and parsing tests with typed parsers
- Add smart httpx MockTransport routing based on endpoint paths
- Render HTML fixtures via Jinja templates populated from JSON data
- Introduce explicit, typed HTML parsers for semantic and table-based content
- Add end-to-end tests covering scraper → content → parser → Pydantic models
- Enforce explicit output contracts and avoid default dict-based parsing
2026-01-02 18:31:34 +05:30

66 lines
1.5 KiB
Python

import json
import pytest
import httpx
from pathlib import Path
from jinja2 import Environment, BaseLoader
from omniread.core.content import ContentType
from omniread.html.scraper import HTMLScraper
MOCK_DIR = Path(__file__).parent / "mocks"
def render_html(template_path, data_path) -> bytes:
template_text = Path(template_path).read_text(encoding="utf-8")
data = json.loads(Path(data_path).read_text(encoding="utf-8"))
env = Environment(
loader=BaseLoader(),
autoescape=False,
)
template = env.from_string(template_text)
rendered = template.render(**data)
return rendered.encode("utf-8")
def mock_transport(request: httpx.Request) -> httpx.Response:
"""
httpx MockTransport handler.
"""
path = request.url.path
if path == "/simple":
content = render_html(
MOCK_DIR / "simple.html.jinja",
MOCK_DIR / "simple.json",
)
elif path == "/table":
content = render_html(
MOCK_DIR / "table.html.jinja",
MOCK_DIR / "table.json",
)
else:
return httpx.Response(
status_code=404,
content=b"Not Found",
request=request,
)
return httpx.Response(
status_code=200,
headers={"Content-Type": ContentType.HTML.value},
content=content,
request=request,
)
@pytest.fixture
def http_scraper() -> HTMLScraper:
transport = httpx.MockTransport(mock_transport)
client = httpx.Client(transport=transport)
return HTMLScraper(client=client)