import json import pytest import httpx from pathlib import Path from jinja2 import Environment, BaseLoader from omniread import ( # core ContentType, # html HTMLScraper, # pdf FileSystemPDFClient, PDFScraper, ) MOCK_HTML_DIR = Path(__file__).parent / "mocks" / "html" MOCK_PDF_DIR = Path(__file__).parent / "mocks" / "pdf" def render_html(template_path, data_path) -> bytes: template_text = Path(template_path).read_text(encoding="utf-8") data = json.loads(Path(data_path).read_text(encoding="utf-8")) env = Environment( loader=BaseLoader(), autoescape=False, ) template = env.from_string(template_text) rendered = template.render(**data) return rendered.encode("utf-8") def mock_transport(request: httpx.Request) -> httpx.Response: """ httpx MockTransport handler. """ path = request.url.path if path not in ['/simple', '/table']: return httpx.Response( status_code=404, content=b"Not Found", request=request, ) endpoint = path.split("/")[-1] content = render_html( MOCK_HTML_DIR / f"{endpoint}.html.jinja", MOCK_HTML_DIR / f"{endpoint}.json", ) return httpx.Response( status_code=200, headers={"Content-Type": ContentType.HTML.value}, content=content, request=request, ) @pytest.fixture def http_scraper() -> HTMLScraper: transport = httpx.MockTransport(mock_transport) client = httpx.Client(transport=transport) return HTMLScraper(client=client) class MockPDFClient(FileSystemPDFClient): """ Test-only PDF client that routes logical identifiers to fixture files. """ def fetch(self, source: str) -> bytes: if source in ["simple"]: source = MOCK_PDF_DIR / f"{source}.pdf" else: raise FileNotFoundError(f"No mock PDF route for '{source}'") return super().fetch(source) @pytest.fixture def pdf_scraper() -> PDFScraper: client = MockPDFClient() return PDFScraper(client=client)