Files
omniread/tests/test_pdf_simple.py
Vishesh 'ironeagle' Bangotra b2173f3ef0 refactor(tests): use omniread public API instead of internal module imports
- Replace deep imports with top-level omniread exports in tests
- Ensure tests validate only the supported public API surface
- Align HTML and PDF tests with documented library usage
2026-01-02 19:02:20 +05:30

42 lines
861 B
Python

from typing import Literal
from pydantic import BaseModel
from omniread import (
# core
Content,
# pdf
PDFParser,
)
class ParsedPDF(BaseModel):
size_bytes: int
magic: Literal[b"%PDF"]
class SimplePDFParser(PDFParser[ParsedPDF]):
def parse(self) -> ParsedPDF:
raw = self.content.raw
if not raw.startswith(b"%PDF"):
raise ValueError("Not a valid PDF")
return ParsedPDF(
size_bytes=len(raw),
magic=b"%PDF",
)
def test_end_to_end_pdf_simple(pdf_scraper):
# --- Scrape (identifier-based, routed in conftest)
content: Content = pdf_scraper.fetch("simple")
assert content.raw.startswith(b"%PDF")
# --- Parse
parser = SimplePDFParser(content)
result = parser.parse()
assert result.magic == b"%PDF"
assert result.size_bytes > 100