feat(pdf): add PDF client, scraper, parser, and end-to-end tests

- Introduce PDF submodule with client, scraper, and generic parser
- Add filesystem PDF client and test-only mock routing
- Add end-to-end PDF scrape → parse tests with typed output
- Mirror HTML module architecture for consistency
- Expose PDF primitives via omniread public API
This commit is contained in:
2026-01-02 18:59:36 +05:30
parent 390eb22e1b
commit de67c7b0b1
8 changed files with 198 additions and 0 deletions

View File

@@ -1,5 +1,6 @@
from .core import Content, ContentType
from .html import HTMLScraper, HTMLParser
from .pdf import FileSystemPDFClient, PDFScraper, PDFParser
__all__ = [
# core
@@ -9,4 +10,9 @@ __all__ = [
# html
"HTMLScraper",
"HTMLParser",
# pdf
"FileSystemPDFClient",
"PDFScraper",
"PDFParser",
]