import json import pytest import httpx from pathlib import Path from jinja2 import Environment, BaseLoader from omniread.core.content import ContentType from omniread.html.scraper import HTMLScraper from omniread.pdf.client import FileSystemPDFClient from omniread.pdf.scraper import PDFScraper MOCK_HTML_DIR = Path(__file__).parent / "mocks" / "html" MOCK_PDF_DIR = Path(__file__).parent / "mocks" / "pdf" def render_html(template_path, data_path) -> bytes: template_text = Path(template_path).read_text(encoding="utf-8") data = json.loads(Path(data_path).read_text(encoding="utf-8")) env = Environment( loader=BaseLoader(), autoescape=False, ) template = env.from_string(template_text) rendered = template.render(**data) return rendered.encode("utf-8") def mock_transport(request: httpx.Request) -> httpx.Response: """ httpx MockTransport handler. """ path = request.url.path if path not in ['/simple', '/table']: return httpx.Response( status_code=404, content=b"Not Found", request=request, ) endpoint = path.split("/")[-1] content = render_html( MOCK_HTML_DIR / f"{endpoint}.html.jinja", MOCK_HTML_DIR / f"{endpoint}.json", ) return httpx.Response( status_code=200, headers={"Content-Type": ContentType.HTML.value}, content=content, request=request, ) @pytest.fixture def http_scraper() -> HTMLScraper: transport = httpx.MockTransport(mock_transport) client = httpx.Client(transport=transport) return HTMLScraper(client=client) class MockPDFClient(FileSystemPDFClient): """ Test-only PDF client that routes logical identifiers to fixture files. """ def fetch(self, source: str) -> bytes: if source in ["simple"]: source = MOCK_PDF_DIR / f"{source}.pdf" else: raise FileNotFoundError(f"No mock PDF route for '{source}'") return super().fetch(source) @pytest.fixture def pdf_scraper() -> PDFScraper: client = MockPDFClient() return PDFScraper(client=client)